#### Load data
load ("to20_year.RData" )
load ("to50_year.RData" )
load ("to250_year.RData" )
load ("wd250x.RData" )
load ("to_lab.RData" )
load ("spend.RData" )
load ("thetimes.RData" )
to250_year$ dyear <- to250_year$ year + 1784
#### Create a data frame of the number of CRISIS NEWS WAVES
fifty <- c (1800 , 1850 , 1900 , 1950 , 2000 )
y25 <- c (1785 , 1800 , 1825 , 1850 , 1875 , 1900 , 1925 , 1950 , 1975 , 2000 , 2020 , 2025 , 2050 )
cnw.count <- (table (wd250x$ year))
df.cnw.count <- data.frame (year = 1785 : 2020 , cnw.count = 0 )
df.cnw.count$ cnw.count <- cnw.count[match (df.cnw.count$ year, as.numeric (names (cnw.count)))]
df.cnw.count$ cnw <- ifelse (is.na (df.cnw.count$ cnw.count), 0 , df.cnw.count$ cnw.count)
###################################
###################################
###################################
###################################
#### Test hypothesis 1.3: CNW count has increased 1785-2020
count_cnw_year <- ggplot (df.cnw.count, aes (x = year, y = cnw)) +
geom_smooth (color = "darkslategray4" , fill = "darkslategray4" , linetype = "solid" ) +
geom_smooth (method = "lm" , color = "red" , fill = "red" ) +
theme_soft () +
scale_x_continuous (breaks = seq (1780 , 2020 , 20 )) +
ylab ("Count of crisis news waves" ) +
geom_text (aes (label = year, size = cnw)) +
scale_y_log10 (breaks = c (5 , 10 , 20 , 30 , 40 , 50 ))
ggsave (count_cnw_year, file = "soft_count_cnw_year.svg" , unit = "cm" , width = 20 , height = 10 , dpi = 1200 , scale = 1.25 )
lm.cnw_count.year <- (lm (cnw ~ I (year - 1785 ), data = df.cnw.count))
summary (lm.cnw_count.year)
confint (lm.cnw_count.year)
pred.cnw_count.year <- predict (lm.cnw_count.year, newdata = data.frame (year = y25))
names (pred.cnw_count.year) <- y25
### B=.026 (.006); t=4.092; p<.001
thetimes$ cnw.count <- cnw.share[match (thetimes$ year, names (cnw.share))]
thetimes$ cl.count <- cl.share[match (thetimes$ year, cl.share$ dyear), "total.count" ]
thetimes$ cnw.count <- replace (thetimes$ cnw.count, is.na (thetimes$ cnw.count), 0 )
thetimes$ cl.count <- replace (thetimes$ cl.count, is.na (thetimes$ cl.count), 0 )
thetimes$ cnw.share <- thetimes$ cnw.count / thetimes$ articles
thetimes$ cl.share <- thetimes$ cl.count / thetimes$ articles
thetimes$ cnw_to_cl.share <- thetimes$ cnw.count / thetimes$ cl.count
thetimes$ cnw <- df.cnw.count$ cnw
#### %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ####
#### %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ####
#### %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ####
#### %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ####
#### %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ####
#### %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ####
##################################################
#### Create a data frame of the number of CRISIS NEWS WAVES
####
count_cnw_year <- ggplot (df.cnw.count, aes (x = year, y = cnw)) +
geom_point () +
geom_smooth (color = "darkslategray4" , fill = "darkslategray4" , linetype = "solid" ) +
geom_smooth (method = "lm" , color = "red" , fill = "red" ) +
theme_bluewhite () +
scale_x_continuous (breaks = seq (1780 , 2020 , 20 )) +
ylab ("Count of crisis news waves" )
count_cnw_year2 <- ggplot (df.cnw.count, aes (x = year, y = cnw, label = year)) +
geom_point (aes (size = cnw), alpha = .2 , shape = 17 ) +
geom_text (aes (size = cnw)) +
geom_smooth (color = "darkslategray4" , fill = "darkslategray4" , linetype = "solid" ) +
geom_smooth (method = "lm" , color = "red" , fill = "red" ) +
theme_soft () +
scale_x_continuous (breaks = seq (1780 , 2020 , 20 )) +
ylab ("Count of crisis news waves" ) +
scale_y_sqrt () +
theme (legend.position = "none" )
share_cnw_year <- ggplot (thetimes, aes (x = year, y = 100 * cnw.share)) +
geom_point () +
geom_smooth (color = "darkslategray4" , fill = "darkslategray4" , linetype = "solid" ) +
geom_smooth (method = "lm" , color = "red" , fill = "red" ) +
theme_soft () +
scale_x_continuous (breaks = seq (1780 , 2020 , 20 )) +
ylab ("Share of CNW coverage" ) +
scale_y_sqrt ()
share_cl_year <- ggplot (thetimes, aes (x = year, y = 100 * cl.share)) +
geom_point () +
geom_smooth (color = "darkslategray4" , fill = "darkslategray4" , linetype = "solid" ) +
geom_smooth (method = "lm" , color = "red" , fill = "red" ) +
theme_soft () +
scale_x_continuous (breaks = seq (1780 , 2020 , 20 )) +
ylab ("Share of CL coverage" ) +
scale_y_sqrt ()
intensity_cnw <- ggplot (subset (wd250x, volume < 100 ), aes (y = intensity2, x = as.numeric (year))) +
geom_point () +
geom_smooth (color = "darkslategray4" , fill = "darkslategray4" , linetype = "solid" ) +
geom_smooth (method = "lm" , color = "red" , fill = "red" , formula = y ~ x) +
theme_soft () +
scale_x_continuous (breaks = seq (1780 , 2020 , 20 )) +
ylab ("Intensity of crisis news waves (articles per day)" ) +
xlab ("Year" ) +
scale_y_sqrt ()
volume_cnw <- ggplot (subset (wd250x, volume < 100 ), aes (y = volume2, x = as.numeric (year))) +
geom_point () +
geom_smooth (color = "darkslategray4" , fill = "darkslategray4" , linetype = "solid" ) +
geom_smooth (method = "lm" , color = "red" , fill = "red" , formula = y ~ x) +
theme_soft () +
scale_x_continuous (breaks = seq (1780 , 2020 , 20 )) +
ylab ("Volume of crisis news waves" ) +
xlab ("Year" ) +
scale_y_sqrt ()
duration_cnw <- ggplot ((wd250x), aes (y = duration, x = as.numeric (year))) +
geom_point () +
geom_smooth (color = "darkslategray4" , fill = "darkslategray4" , linetype = "solid" ) +
geom_smooth (method = "lm" , color = "red" , fill = "red" ) +
theme_soft () +
scale_x_continuous (breaks = seq (1780 , 2020 , 20 )) +
ylab ("Duration of crisis news waves" ) +
xlab ("Year" ) +
scale_y_sqrt ()
ggsave (count_cnw_year2, file = "count_cnw_year.svg" , dpi = 1200 , width = 16 , height = 8 , unit = "cm" , scale = 1.5 )
ggsave (share_cl_year, file = "share_cl_year.svg" , dpi = 1200 , width = 16 , height = 8 , unit = "cm" , scale = 1.5 )
ggsave (share_cnw_year, file = "share_cnw_year.svg" , dpi = 1200 , width = 16 , height = 8 , unit = "cm" , scale = 1.5 )
ggsave (volume_cnw, file = "volume_year.svg" , dpi = 1200 , width = 16 , height = 8 , unit = "cm" , scale = 1.5 )
ggsave (duration_cnw, file = "duration_cnw.svg" , dpi = 1200 , width = 16 , height = 8 , unit = "cm" , scale = 1.5 )
ggsave (intensity_cnw, file = "intensity_cnw.svg" , dpi = 1200 , width = 16 , height = 8 , unit = "cm" , scale = 1.5 )
search_for_K <- ggplot (subset (sk10t, variable %in% c ("exclus" , "semcoh" , "heldout" , "residual" )), aes (x = K, y = value)) +
geom_point () +
geom_line () +
geom_smooth (color = "darkslategray4" ) +
geom_vline (xintercept = 250 , color = "red" ) +
facet_wrap (. ~ variable, scales = "free_y" ) +
theme_bluewhite () +
ylab ("Value" )
ggsave (search_for_K, file = "search_for_K.svg" , dpi = 1200 , scale = 1.5 , unit = "cm" , width = 16 , height = 10 )
lm.cl_share.year <- (lm (100 * cl.share ~ I (year - 1785 ) + 0 , data = thetimes))
lm.cnw_share.year <- (lm (100 * cnw.share ~ I (year - 1785 ) + 0 , data = thetimes))
lm.cnw_count.year <- (lm (cnw ~ I (year - 1785 ), data = thetimes))
lm.cnw_volume.year <- (lm ((volume + duration * baseline30) ~ I (as.numeric (year) - 1785 ) + 0 , data = wd250x))
lm.cnw_duration.year <- (lm (duration ~ I (as.numeric (year) - 1785 ), data = wd250x))
lm.cnw_intensity.year <- (lm (I (intensity + baseline30) ~ I (as.numeric (year) - 1785 ) + 0 , data = wd250x))
ci.cl_share.year <- confint (lm.cl_share.year)
ci.cnw_share.year <- confint (lm.cnw_share.year)
ci.cnw_count.year <- confint (lm.cnw_count.year)
ci.cnw_volume.year <- confint (lm.cnw_volume.year)
ci.cnw_duration.year <- confint (lm.cnw_duration.year)
ci.cnw_intensity.year <- confint (lm.cnw_intensity.year)
pred.cl_share.year <- predict (lm.cl_share.year, newdata = data.frame (year = y25))
pred.cnw_share.year <- predict (lm.cnw_share.year, newdata = data.frame (year = y25))
pred.cnw_count.year <- predict (lm.cnw_count.year, newdata = data.frame (year = y25))
pred.cnw_volume.year <- predict (lm.cnw_volume.year, newdata = data.frame (year = y25))
pred.cnw_duration.year <- predict (lm.cnw_duration.year, newdata = data.frame (year = y25))
pred.cnw_intensity.year <- predict (lm.cnw_intensity.year, newdata = data.frame (year = y25))
names (pred.cl_share.year) <- y25
names (pred.cnw_share.year) <- y25
names (pred.cnw_count.year) <- y25
names (pred.cnw_volume.year) <- y25
names (pred.cnw_duration.year) <- y25
names (pred.cnw_intensity.year) <- y25
topic_spectrum_year <- ggplot (subset (wd250x, ! is.na (topic20)), aes (x = as.numeric (year), xmin = as.numeric (1785 + (start / 365 )), xmax = as.numeric (1785 + 2.5 * ((end - start) / 365 ) + start / 365 ), color = topic20_ord, fill = topic20_ord, ymin = topic20_num + coord - 1 , ymax = topic20_num + coord + (volume / 195 )^ (1 / 2 ) - 1 )) +
geom_rect () +
theme_bluewhite () +
scale_x_continuous (breaks = seq (1780 , 2020 , 20 )) +
geom_text (aes (x = 2025 , y = topic20_num - 0.5 , label = topic20_ord), hjust = 0 ) +
geom_text (aes (x = 1790 , y = topic20_num - 0.5 , label = topic20_ord), hjust = 0 ) +
scale_fill_viridis_d (begin = .1 , end = .9 ) +
scale_color_viridis_d (begin = .1 , end = .9 ) +
scale_y_continuous (breaks = seq (0 , 20 , 5 ), minor_breaks = seq (0 , 20 , 1 )) +
scale_color_manual ("Topic" , values = rep (c ("#a6cee3" , "#1f78b4" , "#b2df8a" , "#33a02c" , "#fb9a99" , "#e31a1c" , "#fdbf6f" , "#ff7f00" , "#cab2d6" , "#6a3d9a" ), times = 2 )) +
scale_fill_manual ("Topic" , values = rep (c ("#a6cee3" , "#1f78b4" , "#b2df8a" , "#33a02c" , "#fb9a99" , "#e31a1c" , "#fdbf6f" , "#ff7f00" , "#cab2d6" , "#6a3d9a" ), times = 2 )) +
ylab ("Topics" ) +
xlab ("Year" ) +
theme (axis.text.y = element_blank ())
topic_spectrum_year + geom_smooth (data = thetimes, aes (x = year, y = cnw / 2 , xmin = NULL , xmax = NULL , ymin = NULL , ymax = NULL ), color = "black" , fill = "black" )
topic_spectrum_year + geom_smooth (data = thetimes, aes (x = year, y = 100 * cnw.count / max (cnw.count), xmin = NULL , xmax = NULL , ymin = NULL , ymax = NULL ), color = "black" , fill = "black" )
ggsave (topic_spectrum_year, file = "topic_spectrum_year.svg" , dpi = 1200 , unit = "cm" , width = 16 , height = 8 , scale = 1.75 )
EPI_topic_spectrum_year <- ggplot (subset (wd250x, topic20 == "EPI" ), aes (color = topic20, fill = topic20, x = as.numeric (year), xmin = as.numeric (1785 + (start / 365 )), xmax = as.numeric (1785 + 2.5 * ((end - start) / 365 ) + start / 365 ), ymin = topic20_num + coord - 1 , ymax = topic20_num + coord + (volume / 50000 )^ (1 / 2 ) - 1 )) +
geom_rect (color = "black" , alpha = .3 ) +
theme_bluewhite () +
scale_x_continuous (breaks = seq (1780 , 2020 , 20 )) +
geom_text (aes (x = 2025 , y = topic20_num - 0.5 , label = topic20_ord), hjust = 0 ) +
geom_text (aes (x = 1790 , y = topic20_num - 0.5 , label = topic20_ord), hjust = 0 ) +
geom_text (aes (label = event_id, y = topic20_num + coord - 1 ), hjust = 1 ) +
geom_text (aes (label = event_label, y = topic20_num + coord - 1 ), hjust = 0 ) +
scale_y_continuous (breaks = seq (0 , 20 , 5 ), minor_breaks = seq (0 , 20 , 1 )) +
ylab ("Topics" ) +
xlab ("Year" ) +
theme (axis.text.y = element_blank ()) +
scale_color_manual (values = subset (wd250x, topic20 == "EPI" )$ color20) +
scale_fill_manual (values = subset (wd250x, topic20 == "EPI" )$ color20) +
scale_x_continuous (breaks = seq (1780 , 2020 , 20 ))
ggplotly (EPI_topic_spectrum_year)
POL_topic_spectrum_year <- ggplot (subset (wd250x, topic20 == "POL" ), aes (color = topic20, fill = topic20, x = as.numeric (year), xmin = as.numeric (1785 + (start / 365 )), xmax = as.numeric (1785 + 2.5 * ((end - start) / 365 ) + start / 365 ), ymin = topic20_num + coord - 1 , ymax = topic20_num + coord + (volume / 50000 )^ (1 / 2 ) - 1 )) +
geom_rect (color = "black" , alpha = .3 ) +
theme_bluewhite () +
scale_x_continuous (breaks = seq (1780 , 2020 , 20 )) +
geom_text (aes (x = 2025 , y = topic20_num - 0.5 , label = topic20_ord), hjust = 0 ) +
geom_text (aes (x = 1790 , y = topic20_num - 0.5 , label = topic20_ord), hjust = 0 ) +
geom_text (aes (label = event_id, y = topic20_num + coord - 1 ), hjust = 1 ) +
geom_text (aes (label = event_label, y = topic20_num + coord - 1 ), hjust = 0 ) +
scale_y_continuous (breaks = seq (0 , 20 , 5 ), minor_breaks = seq (0 , 20 , 1 )) +
ylab ("Topics" ) +
xlab ("Year" ) +
theme (axis.text.y = element_blank ()) +
scale_color_manual (values = subset (wd250x, topic20 == "POL" )$ color20) +
scale_fill_manual (values = subset (wd250x, topic20 == "POL" )$ color20) +
scale_x_continuous (breaks = seq (1780 , 2020 , 20 ))
ggplotly (POL_topic_spectrum_year)
TRA_topic_spectrum_year <- ggplot (subset (wd250x, topic20 == "TRA" ), aes (color = topic20, fill = topic20, x = as.numeric (year), xmin = as.numeric (1785 + (start / 365 )), xmax = as.numeric (1785 + 2.5 * ((end - start) / 365 ) + start / 365 ), ymin = topic20_num + coord - 1 , ymax = topic20_num + coord + (volume / 50000 )^ (1 / 2 ) - 1 )) +
geom_rect (color = "black" , alpha = .3 ) +
theme_bluewhite () +
scale_x_continuous (breaks = seq (1780 , 2020 , 20 )) +
geom_text (aes (x = 2025 , y = topic20_num - 0.5 , label = topic20_ord), hjust = 0 ) +
geom_text (aes (x = 1790 , y = topic20_num - 0.5 , label = topic20_ord), hjust = 0 ) +
geom_text (aes (label = event_id, y = topic20_num + coord - 1 ), hjust = 1 ) +
geom_text (aes (label = event_label, y = topic20_num + coord - 1 ), hjust = 0 ) +
scale_y_continuous (breaks = seq (0 , 20 , 5 ), minor_breaks = seq (0 , 20 , 1 )) +
ylab ("Topics" ) +
xlab ("Year" ) +
theme (axis.text.y = element_blank ()) +
scale_color_manual (values = subset (wd250x, topic20 == "TRA" )$ color20) +
scale_fill_manual (values = subset (wd250x, topic20 == "TRA" )$ color20) +
scale_x_continuous (breaks = seq (1780 , 2020 , 20 ))
ggplotly (TRA_topic_spectrum_year)
ECO_topic_spectrum_year <- ggplot (subset (wd250x, topic20 == "ECO" ), aes (color = topic20, fill = topic20, x = as.numeric (year), xmin = as.numeric (1785 + (start / 365 )), xmax = as.numeric (1785 + 2.5 * ((end - start) / 365 ) + start / 365 ), ymin = topic20_num + coord - 1 , ymax = topic20_num + coord + (volume / 50000 )^ (1 / 2 ) - 1 )) +
geom_rect (color = "black" , alpha = .3 ) +
theme_bluewhite () +
scale_x_continuous (breaks = seq (1780 , 2020 , 20 )) +
geom_text (aes (x = 2025 , y = topic20_num - 0.5 , label = topic20_ord), hjust = 0 ) +
geom_text (aes (x = 1790 , y = topic20_num - 0.5 , label = topic20_ord), hjust = 0 ) +
geom_text (aes (label = paste0 (" \n\n " , event_id), y = topic20_num + coord - 1 ), hjust = 0.5 ) +
geom_text (aes (label = event_label, y = topic20_num + coord - 1 ), hjust = 0 ) +
scale_y_continuous (breaks = seq (0 , 20 , 5 ), minor_breaks = seq (0 , 20 , 1 )) +
ylab ("Topics" ) +
xlab ("Year" ) +
theme (axis.text.y = element_blank ()) +
scale_color_manual (values = subset (wd250x, topic20 == "ECO" )$ color20) +
scale_fill_manual (values = subset (wd250x, topic20 == "ECO" )$ color20)
ggplotly (ECO_topic_spectrum_year)
EPI_topic_spectrum_year <- ggplot (subset (wd250x, topic20 == "EPI" ), aes (color = topic20, fill = topic20, x = as.numeric (year), xmin = as.numeric (1785 + (start / 365 )), xmax = as.numeric (1785 + 2.5 * ((end - start) / 365 ) + start / 365 ), ymin = topic20_num + coord - 1 , ymax = topic20_num + coord + (volume / 50000 )^ (1 / 2 ) - 1 )) +
geom_rect (color = "black" , alpha = .3 ) +
theme_bluewhite () +
scale_x_continuous (breaks = seq (1780 , 2020 , 20 )) +
geom_text (aes (x = 2025 , y = topic20_num - 0.5 , label = topic20_ord), hjust = 0 ) +
geom_text (aes (x = 1790 , y = topic20_num - 0.5 , label = topic20_ord), hjust = 0 ) +
geom_text (aes (label = event_id, y = topic20_num + coord - 1 )) +
scale_y_continuous (breaks = seq (0 , 20 , 5 ), minor_breaks = seq (0 , 20 , 1 )) +
ylab ("Topics" ) +
xlab ("Year" ) +
theme (axis.text.y = element_blank ()) +
scale_color_manual (values = subset (wd250x, topic20 == "EPI" )$ color20) +
scale_fill_manual (values = subset (wd250x, topic20 == "EPI" )$ color20)
ECO_topic_spectrum_year <- ggplot (subset (wd250x, topic20 == "ECO" ), aes (color = topic20, fill = topic20, x = as.numeric (year), xmin = as.numeric (1785 + (start / 365 )), xmax = as.numeric (1785 + 2.5 * ((end - start) / 365 ) + start / 365 ), ymin = topic20_num + coord - 1 , ymax = topic20_num + coord + (volume / 50000 )^ (1 / 2 ) - 1 )) +
geom_rect (color = "black" , alpha = .3 ) +
theme_bluewhite () +
scale_x_continuous (breaks = seq (1780 , 2020 , 20 )) +
geom_text (aes (x = 2025 , y = topic20_num - 0.5 , label = topic20_ord), hjust = 0 ) +
geom_text (aes (x = 1790 , y = topic20_num - 0.5 , label = topic20_ord), hjust = 0 ) +
geom_text (aes (label = event_id, y = topic20_num + coord - 1 )) +
scale_y_continuous (breaks = seq (0 , 20 , 5 ), minor_breaks = seq (0 , 20 , 1 )) +
ylab ("Topics" ) +
xlab ("Year" ) +
theme (axis.text.y = element_blank ()) +
scale_color_manual (values = subset (wd250x, topic20 == "ECO" )$ color20) +
scale_fill_manual (values = subset (wd250x, topic20 == "ECO" )$ color20)
ECO_topic_spectrum_year <- ggplot (subset (wd250x, topic20 == "ECO" ), aes (x = as.numeric (year), xmin = as.numeric (1785 + (start / 365 )), xmax = as.numeric (1785 + 2.5 * ((end - start) / 365 ) + start / 365 ), color = topic20_ord, fill = topic20_ord, ymin = topic20_num + coord - 1 , ymax = topic20_num + coord + (volume / 50000 )^ (1 / 2 ) - 1 )) +
geom_rect (color = "black" , alpha = .3 ) +
theme_bluewhite () +
scale_x_continuous (breaks = seq (1780 , 2020 , 20 )) +
geom_text (aes (x = 2025 , y = topic20_num - 0.5 , label = topic20_ord), hjust = 0 ) +
geom_text (aes (x = 1790 , y = topic20_num - 0.5 , label = topic20_ord), hjust = 0 ) +
scale_fill_viridis_d (begin = .1 , end = .9 ) +
scale_color_viridis_d (begin = .1 , end = .9 ) +
scale_y_continuous (breaks = seq (0 , 20 , 5 ), minor_breaks = seq (0 , 20 , 1 )) +
scale_color_manual ("Topic" , values = rep (c ("#a6cee3" , "#1f78b4" , "#b2df8a" , "#33a02c" , "#fb9a99" , "#e31a1c" , "#fdbf6f" , "#ff7f00" , "#cab2d6" , "#6a3d9a" ), times = 2 )) +
scale_fill_manual ("Topic" , values = rep (c ("#a6cee3" , "#1f78b4" , "#b2df8a" , "#33a02c" , "#fb9a99" , "#e31a1c" , "#fdbf6f" , "#ff7f00" , "#cab2d6" , "#6a3d9a" ), times = 2 )) +
ylab ("Topics" ) +
xlab ("Year" ) +
theme (axis.text.y = element_blank ())
POL_topic_spectrum_year <- ggplot (subset (wd250x, topic20 == "POL" ), aes (x = as.numeric (year), xmin = as.numeric (1785 + (start / 365 )), xmax = as.numeric (1785 + 2.5 * ((end - start) / 365 ) + start / 365 ), color = topic20_ord, fill = topic20_ord, ymin = topic20_num + coord - 1 , ymax = topic20_num + coord + (volume / 50000 )^ (1 / 2 ) - 1 )) +
geom_rect (color = "black" , alpha = .3 ) +
theme_bluewhite () +
scale_x_continuous (breaks = seq (1780 , 2020 , 20 )) +
geom_text (aes (x = 2025 , y = topic20_num - 0.5 , label = topic20_ord), hjust = 0 ) +
geom_text (aes (x = 1790 , y = topic20_num - 0.5 , label = topic20_ord), hjust = 0 ) +
scale_fill_viridis_d (begin = .1 , end = .9 ) +
scale_color_viridis_d (begin = .1 , end = .9 ) +
scale_y_continuous (breaks = seq (0 , 20 , 5 ), minor_breaks = seq (0 , 20 , 1 )) +
scale_color_manual ("Topic" , values = rep (c ("#a6cee3" , "#1f78b4" , "#b2df8a" , "#33a02c" , "#fb9a99" , "#e31a1c" , "#fdbf6f" , "#ff7f00" , "#cab2d6" , "#6a3d9a" ), times = 2 )) +
scale_fill_manual ("Topic" , values = rep (c ("#a6cee3" , "#1f78b4" , "#b2df8a" , "#33a02c" , "#fb9a99" , "#e31a1c" , "#fdbf6f" , "#ff7f00" , "#cab2d6" , "#6a3d9a" ), times = 2 )) +
ylab ("Topics" ) +
xlab ("Year" ) +
theme (axis.text.y = element_blank ())
DIS_topic_spectrum_year <- ggplot (subset (wd250x, topic20 == "DIS" ), aes (x = as.numeric (year), xmin = as.numeric (1785 + (start / 365 )), xmax = as.numeric (1785 + 2.5 * ((end - start) / 365 ) + start / 365 ), color = topic20_ord, fill = topic20_ord, ymin = topic20_num + coord - 1 , ymax = topic20_num + coord + (volume / 50000 )^ (1 / 2 ) - 1 )) +
geom_rect (color = "black" , alpha = .3 ) +
theme_bluewhite () +
scale_x_continuous (breaks = seq (1780 , 2020 , 20 )) +
geom_text (aes (x = 2025 , y = topic20_num - 0.5 , label = topic20_ord), hjust = 0 ) +
geom_text (aes (x = 1790 , y = topic20_num - 0.5 , label = topic20_ord), hjust = 0 ) +
scale_fill_viridis_d (begin = .1 , end = .9 ) +
scale_color_viridis_d (begin = .1 , end = .9 ) +
scale_y_continuous (breaks = seq (0 , 20 , 5 ), minor_breaks = seq (0 , 20 , 1 )) +
scale_color_manual ("Topic" , values = rep (c ("#a6cee3" , "#1f78b4" , "#b2df8a" , "#33a02c" , "#fb9a99" , "#e31a1c" , "#fdbf6f" , "#ff7f00" , "#cab2d6" , "#6a3d9a" ), times = 2 )) +
scale_fill_manual ("Topic" , values = rep (c ("#a6cee3" , "#1f78b4" , "#b2df8a" , "#33a02c" , "#fb9a99" , "#e31a1c" , "#fdbf6f" , "#ff7f00" , "#cab2d6" , "#6a3d9a" ), times = 2 )) +
ylab ("Topics" ) +
xlab ("Year" ) +
theme (axis.text.y = element_blank ())
eco.cnw.count <- data.frame (table (wd250x$ year, wd250x$ topic20 == "ECO" ))
df.eco.cnw.count <- data.frame (year = c (1785 : 2020 , 1785 : 2020 ), topic = rep (c ("ECO" , "non-ECO" ), each = 236 ), cnw.count = 0 )
df.eco.cnw.count$ cnw.count[1 : 236 ] <- subset (eco.cnw.count, Var2 == TRUE )[match (subset (df.eco.cnw.count, topic == "ECO" )$ year, subset (eco.cnw.count, Var2 == TRUE )$ Var1), "Freq" ]
df.eco.cnw.count$ cnw.count[237 : 472 ] <- subset (eco.cnw.count, Var2 == FALSE )[match (subset (df.eco.cnw.count, topic == "non-ECO" )$ year, subset (eco.cnw.count, Var2 == FALSE )$ Var1), "Freq" ]
ggplot (df.eco.cnw.count, aes (y = cnw.count, x = year, color = topic, fill = topic, shape = topic)) +
geom_point () +
geom_smooth ()
##################################################
#### Volume of CRISIS NEWS WAVES
####
ggplot (subset (wd250x, volume < 100 ), aes (y = volume, x = as.numeric (year))) +
geom_point () +
geom_smooth (color = "darkslategray4" , fill = "darkslategray4" , linetype = "solid" ) +
geom_smooth (method = "lm" , color = "red" , fill = "red" , formula = y ~ x) +
theme_bluewhite () +
scale_x_continuous (breaks = seq (1780 , 2020 , 20 )) +
ylab ("Volume of crisis news waves" )
summary (lm (volume ~ as.numeric (year), data = wd250x))
### B=.044 (.004); t=12.34; p<.001
ggplot (subset (wd250x, volume < 100 ), aes (y = volume, x = as.numeric (year))) +
geom_point () +
geom_smooth (color = "darkslategray4" , fill = "darkslategray4" , linetype = "solid" ) +
geom_smooth (method = "lm" , color = "red" , fill = "red" ) +
theme_bluewhite () +
scale_x_continuous (breaks = seq (1780 , 2020 , 20 )) +
facet_wrap (. ~ topic20) +
ylim (0 , 10 )
ggplot (subset (wd250x, volume < 100 ), aes (y = volume, x = as.numeric (year))) +
geom_point () +
geom_smooth (color = "darkslategray4" , fill = "darkslategray4" , linetype = "solid" ) +
geom_smooth (method = "lm" , color = "red" , fill = "red" ) +
theme_bluewhite () +
scale_x_continuous (breaks = seq (1780 , 2020 , 20 )) +
facet_wrap (. ~ topic20 == "ECO" ) +
ylim (0 , 10 )
ggplot ((wd250x), aes (y = volume, x = as.numeric (year), shape = (topic20 == "ECO" ), color = (topic20 == "ECO" ))) +
geom_point () +
geom_smooth () +
theme_bluewhite () +
scale_x_continuous (breaks = seq (1780 , 2020 , 20 )) +
ylim (0 , 10 )
##################################################
#### Duration of CRISIS NEWS WAVES
####
ggplot ((wd250x), aes (y = duration, x = as.numeric (year))) +
geom_point () +
geom_smooth (color = "darkslategray4" , fill = "darkslategray4" , linetype = "solid" ) +
geom_smooth (method = "lm" , color = "red" , fill = "red" ) +
theme_bluewhite () +
scale_x_continuous (breaks = seq (1780 , 2020 , 20 )) +
ylab ("Duration of crisis news waves" )
summary (lm (duration ~ as.numeric (year), data = wd250x))
### B=.840 (.042); t=19.83; p<.001
ggplot ((wd250x), aes (y = duration, x = as.numeric (year))) +
geom_point () +
geom_smooth (color = "darkslategray4" , fill = "darkslategray4" , linetype = "solid" ) +
geom_smooth (method = "lm" , color = "red" , fill = "red" ) +
theme_bluewhite () +
scale_x_continuous (breaks = seq (1780 , 2020 , 20 )) +
facet_wrap (. ~ topic20) +
ylim (0 , 500 )
ggplot ((wd250x), aes (y = duration, x = as.numeric (year))) +
geom_point () +
geom_smooth (color = "darkslategray4" , fill = "darkslategray4" , linetype = "solid" ) +
geom_smooth (method = "lm" , color = "red" , fill = "red" ) +
theme_bluewhite () +
scale_x_continuous (breaks = seq (1780 , 2020 , 20 )) +
facet_wrap (. ~ topic20 == "ECO" ) +
ylim (0 , 500 )
ggplot ((wd250x), aes (y = duration, x = as.numeric (year), shape = (topic20 == "ECO" ), color = (topic20 == "ECO" ))) +
geom_point () +
geom_smooth () +
theme_bluewhite () +
scale_x_continuous (breaks = seq (1780 , 2020 , 20 )) +
ylim (0 , 500 )
##################################################
#### Intensity of CRISIS NEWS WAVES
####
ggplot (subset (wd250x, intensity < 0.4 ), aes (y = intensity, x = as.numeric (year))) +
geom_point () +
geom_smooth (color = "darkslategray4" , fill = "darkslategray4" , linetype = "solid" ) +
geom_smooth (method = "lm" , color = "red" , fill = "red" ) +
theme_bluewhite () +
scale_x_continuous (breaks = seq (1780 , 2020 , 20 ))
summary (lm (intensity ~ as.numeric (year), data = wd250x))
### B=.00014487 (.00001336); t=10.846; p<.001
## News volume
newsvolume <- data.frame (year = thetimes$ year, articles = thetimes$ articles)
editors <- data.frame (
start = c (1785 , 1803 , 1812 , 1817 , 1841 , 1877 , 1884 , 1912 , 1919 , 1919 , 1923 , 1941 , 1948 , 1952 , 1967 , 1981 , 1982 , 1985 , 1990 , 1992 , 2002 , 2007 , 2013 ),
end = c (1803 , 1812 , 1817 , 1841 , 1877 , 1884 , 1912 , 1919 , 1919 , 1923 , 1941 , 1948 , 1952 , 1967 , 1981 , 1982 , 1985 , 1990 , 1992 , 2002 , 2007 , 2013 , 2020 ),
editor = c ("Walter I" , "Walter II" , "Stoddard" , "Barnes" , "Delane" , "Chenery" , "Buckle" , "Dawson" , "Freeman" , "Steed" , "Dawson" , "Barrington-Ward" , "Casey" , "Haley" , "Rees-Mogg" , "Evans" , "Douglas-Home" , "Wilson" , "Jenkins" , "Stothard" , "Thomson" , "Harding" , "Witherow" )
)
owners <- data.frame (
start = c (1785 , 1803 , 1847 , 1894 , 1908 , 1922 , 1959 , 1966 , 1976 , 1981 ),
end = c (1803 , 1847 , 1894 , 1908 , 1922 , 1959 , 1966 , 1976 , 1981 , 2020 ),
owner = c ("Walter I" , "Walter II" , "Walter III" , "Walter IV" , "Harmsworth" , "Astor I" , "Astor II" , "Thomson I" , "Thomson II" , "Murdoch" )
)
events <- data.frame (
start = c (1803 , 1914 , 1939 , 1935 , 1887 , 1920 , 1814 , 1844 , 1838 , 1848 , 1860 , 1978 ),
end = c (1815 , 1918 , 1945 , 1938 , 1888 , 1921 , 1815 , 1845 , 1839 , 1870 , 1866 , 1979 ),
event = c ("Napoleonic Wars" , "World War I" , "World War II" , "Appeasement" , "Piggott forgeries" , "Zion hoax" , "Hi-speed steam press" , "Rotary press" , "London-Birmingham Postal Railway" , "Telegraph network established" , "'Walter Press'" , "Strike" )
)
prices <- data.frame (
year = c (1788 , 1797 , 1805 , 1814 , 1824 , 1838 , 1847 , 1857 , 1865 , 1873 , 1882 , 1890 , 1907 , 1921 , 1931 , 1939 , 1950 , 1973 , 1980 , 1990 , 2000 , 2007 , 2014 , 2020 ),
price = c (3 / 240 , 6 / 240 , 6 / 240 , 6.5 / 240 , 7 / 240 , 5 / 240 , 5 / 240 , 4 / 240 , 3 / 240 , 3 / 240 , 3 / 240 , 3 / 240 , 3 / 240 , 5 / 240 , 4 / 240 , 2.5 / 240 , 3 / 240 , 3 / 100 , 20 / 100 , 35 / 100 , 35 / 100 , 65 / 100 , 120 / 100 , 220 / 100 )
)
spend$ GDP_deflator_i <- as.numeric (c (rep (1.41 , times = 11 ), spend$ GDP.Deflator[12 : 230 ], rep (106.00 , times = 2 )))
prices$ realprice <- 100 * prices$ price / spend[match (prices$ year, spend$ Year), "GDP_deflator_i" ]
circulation <- data.frame (
year = c (1815 , 1852 , 1910 , 1921 , 1930 , 1939 , 1947 , 1956 , 1966 , 1976 , 1980 , 1992 , 2000 , 2005 , 2010 , 2015 , 2019 ),
circulation = c (5000 , 42384 , 45000 , 113000 , 187000 , 204000 , 268000 , 220716 , 282000 , 310000 , 297000 , 386258 , 726349 , 686327 , 508250 , 396621 , 417298 )
)
senseless.topics <- aggregate (long.STM$ pr, by = list (long.STM$ year, is.na (long.STM$ area)), FUN = "sum" )
senseless.topics$ total <- rep (tapply (senseless.topics[, "x" ], senseless.topics$ Group.1 , "sum" ), times = 2 )
senseless.topics$ share <- senseless.topics$ x / senseless.topics$ total
gg.senseless.topics <- ggplot (senseless.topics, aes (y = 100 * share, x = as.numeric (Group.1 ), fill = Group.2 )) +
geom_area (position = "stack" ) +
theme_bluewhite () +
scale_x_continuous (breaks = seq (1780 , 2020 , 10 )) +
scale_color_viridis_d () +
scale_fill_viridis_d (begin = .2 , end = .8 , labels = c ("Interpretable" , "Uninterpretable" )) +
xlab ("Year" ) +
ylab ("Share of non-interpretable issues" ) +
guides (fill = guide_legend ("Interpretability \n of topic" )) +
theme (legend.position = c (0.7 , 0.7 ))
ggplot (subset (senseless.topics, Group.2 == TRUE ), aes (y = 100 * share, x = as.numeric (Group.1 ))) +
geom_point () +
geom_smooth () +
theme_bluewhite () +
scale_x_continuous (breaks = seq (1780 , 2020 , 10 ))
ggsave (gg.senseless.topics, file = "senseless.svg" , unit = "cm" , width = 16 , height = 8 , scale = 1.25 )
gg.threshold <- ggplot (newsvolume, aes (y = threshold2, x = year)) +
geom_vline (xintercept = fifty, color = "lightblue3" , size = 1.15 ) +
geom_point (color = "#7d3c98" ) +
geom_line (color = "#7d3c98" ) +
theme_bluewhite () +
scale_x_continuous (breaks = seq (1780 , 2020 , 10 )) +
scale_color_viridis_d () +
scale_fill_viridis_d (begin = .2 , end = .8 ) +
xlab ("Year" ) +
ylab ("Thresdhold of volume for \n detection of crisis events" ) +
guides (fill = guide_legend ("Minimum wave volume" ))
ggsave (gg.threshold, file = "threshold.svg" , unit = "cm" , width = 16 , height = 8 , scale = 1.25 )
coverage.volume.trajectory <- ggplot () +
geom_vline (xintercept = c (1800 , 1850 , 1900 , 1950 , 2000 ), color = "lightblue" , size = 1 ) +
geom_point (data = newsvolume, aes (y = articles, x = year), color = "black" ) +
geom_smooth (data = newsvolume, aes (y = articles, x = year), color = "dodgerblue" ) +
geom_point (data = circulation, aes (y = circulation / 7 , x = year), shape = 15 , size = 5 , color = "#888888" ) +
geom_point (data = prices, aes (y = realprice * 50000 , x = year), shape = 16 , size = 5 , color = "hotpink" ) +
geom_text (data = circulation, aes (y = circulation / 7 , x = year + 2 , label = circulation), color = "#888888" , hjust = 0 ) +
geom_rect (data = editors, aes (ymin = 1000 , ymax = 3000 , xmin = start, xmax = end, fill = editor), color = "white" ) +
geom_text (data = editors, aes (y = c (rep (c (- 1000 , - 7000 , - 4000 , - 10000 ), times = 5 ), - 7000 , - 10000 , - 4000 ), x = start, label = editor), hjust = 0 ) +
geom_rect (data = owners, aes (ymin = 140000 , ymax = 142000 , xmin = start, xmax = end, fill = owner), color = "white" ) +
geom_text (data = owners, aes (y = c (rep (c (138000 , 132000 , 135000 , 129000 ), times = 2 ), 138000 , 132000 ), x = start, label = owner), hjust = 0 ) +
geom_rect (data = events, aes (ymin = 120000 , ymax = 125000 , xmin = start, xmax = end, fill = event), color = "white" ) +
geom_text (data = events, aes (y = c (118000 , 118000 , 118000 , 115000 , 115000 , 115000 , 112000 , 118000 , 109000 , 106000 , 112000 , 112000 ), x = start, label = event), hjust = 0 ) +
scale_x_continuous (breaks = seq (1780 , 2020 , 10 )) +
theme_bluewhite () +
theme (legend.position = "none" )
coverage.volume.trajectory
coverage.volume.trajectory <- ggplot () +
geom_vline (xintercept = c (1800 , 1850 , 1900 , 1950 , 2000 ), color = "lightblue" , size = 1 ) +
geom_point (data = newsvolume, aes (y = articles, x = year), color = "black" ) +
geom_smooth (data = newsvolume, aes (y = articles, x = year), color = "dodgerblue" ) +
geom_point (data = circulation, aes (y = circulation / 7 , x = year), shape = 15 , size = 4 , color = "#888888" ) +
geom_point (data = prices, aes (y = realprice * 50000 , x = year), shape = 16 , size = 4 , color = "hotpink" ) +
geom_text (data = prices, aes (y = realprice * 50000 , x = year, label = round (realprice, 2 )), size = 4 , color = "hotpink" , nudge_y = 4000 ) +
geom_text (data = circulation, aes (y = circulation / 7 , x = year + 2 , label = circulation), color = "#888888" , hjust = 0 ) +
scale_x_continuous (breaks = seq (1780 , 2020 , 10 )) +
theme_bluewhite () +
theme (legend.position = "none" ) +
annotate ("text" , size = 5 , x = c (1850 , 1930 , 1825 ), y = c (25000 , 0 , 120000 ), color = c ("black" , "darkgrey" , "hotpink" ), label = c ("total stories published" , "total circulation" , "price (inflation adjusted)" ), fontface = "bold" )
coverage.volume.trajectory
ggsave (coverage.volume.trajectory, file = "cov_vol_traj.svg" , unit = "cm" , width = 16 , height = 12 , scale = 1.5 )
backgrounds <- ggplot () +
geom_vline (xintercept = c (1800 , 1850 , 1900 , 1950 , 2000 ), color = "lightblue" , size = 1 ) +
geom_rect (data = editors, aes (ymin = 1000 , ymax = 3000 , xmin = start, xmax = end, fill = editor), color = "white" ) +
geom_text (data = editors, aes (y = c (rep (c (- 1000 , - 7000 , - 4000 , - 10000 ), times = 5 ), - 7000 , - 10000 , - 4000 ), x = start, label = editor), hjust = 0 ) +
geom_rect (data = owners, aes (ymin = 40000 , ymax = 42000 , xmin = start, xmax = end, fill = owner), color = "white" ) +
geom_text (data = owners, aes (y = c (rep (c (38000 , 32000 , 35000 , 29000 ), times = 2 ), 38000 , 32000 ), x = start, label = owner), hjust = 0 ) +
geom_rect (data = events, aes (ymin = 20000 , ymax = 25000 , xmin = start, xmax = end, fill = event), color = "white" ) +
geom_text (data = events, aes (y = c (18000 , 18000 , 18000 , 15000 , 15000 , 15000 , 12000 , 18000 , 9000 , 6000 , 12000 , 12000 ), x = start, label = event), hjust = 0 ) +
scale_x_continuous (breaks = seq (1780 , 2020 , 10 )) +
theme_bluewhite () +
theme (legend.position = "none" , axis.text.y = element_text (color = "lightskyblue1" )) +
ylab ("Events" ) +
annotate ("text" , fontface = "bold" , x = c (3000 ))
backgrounds
cov.vol.traj <- ggarrange (coverage.volume.trajectory, backgrounds, heights = c (2 , 1 ))
ggsave (cov.vol.traj, file = "cov_vol_traj.svg" , unit = "cm" , width = 16 , height = 8 , scale = 3 )
uniq_count <- function (x) {
return (length (unique (x)))
}
cl <- data.frame (table (wide.STM$ year))
cl_time <- ggplot (cl, aes (x = as.numeric (as.character (Var1)), y = Freq)) +
geom_vline (xintercept = fifty, color = "lightblue3" , size = 1.5 ) +
geom_point () +
geom_smooth (span = 0.5 , color = "steelblue" ) +
theme_bluewhite () +
xlab ("Year" ) +
ylab ("Count of news articles with CL" ) +
scale_x_continuous (breaks = seq (1780 , 2020 , 10 ))
ggsave (cl_time, file = "cl_time.svg" , device = "svg" , unit = "cm" , width = 16 , height = 8 , dpi = 1200 , scale = 1.25 )
eventcount <- aggregate (wavedata.vol50$ TOPIC, by = list (wavedata.vol50$ year), FUN = length)
unique_eventcount <- aggregate (wavedata.vol50$ TOPIC, by = list (wavedata.vol50$ year), FUN = uniq_count)
names (unique_eventcount) <- c ("year" , "UniqueTopicsCovered" )
eventcount_decade <- aggregate (wavedata.vol50$ TOPIC, by = list (wavedata.vol50$ decade), FUN = length)
unique_eventcount_decade <- aggregate (wavedata.vol50$ TOPIC, by = list (wavedata.vol50$ decade), FUN = uniq_count)
names (unique_eventcount_decade) <- c ("decade" , "UniqueTopicsCovered" )
eventcount_decade_area <- aggregate (wavedata.vol50$ AREA, by = list (wavedata.vol50$ decade), FUN = length)
unique_eventcount_decade_area <- aggregate (wavedata.vol50$ AREA, by = list (wavedata.vol50$ decade), FUN = uniq_count)
names (unique_eventcount_decade_area) <- c ("decade" , "UniqueAreasCovered" )
eventcount_decade_area2 <- aggregate (wavedata.vol50$ AREA2, by = list (wavedata.vol50$ decade), FUN = length)
unique_eventcount_decade_area2 <- aggregate (wavedata.vol50$ AREA2, by = list (wavedata.vol50$ decade), FUN = uniq_count)
names (unique_eventcount_decade_area2) <- c ("decade" , "UniqueAreasCovered" )
decade.STM_area2b <- subset (decade.STM_area2, ! area == "Epidemics" & ! area == "Location" )
decade.STM_area2b[decade.STM_area2b$ area == "Epidemic" , "articles" ] <- decade.STM_area2[decade.STM_area2$ area == "Epidemic" , "articles" ] + decade.STM_area2[decade.STM_area2$ area == "Epidemics" , "articles" ]
decade.STM_area2b[decade.STM_area2b$ area == "Geopolitical" , "articles" ] <- decade.STM_area2[decade.STM_area2$ area == "Geopolitical" , "articles" ] + decade.STM_area2[decade.STM_area2$ area == "Location" , "articles" ]
decade.STM_area2b$ total <- tapply (decade.STM_area2b$ articles, decade.STM_area2b$ decade, sum, na.rm = TRUE )
decade.STM_area2b$ share <- decade.STM_area2b$ articles / decade.STM_area2b$ total
year.STM <- aggregate (long.STM$ pr, by = list (long.STM$ year, long.STM$ area2), FUN = "sum" , na.rm = TRUE )
names (year.STM) <- c ("year" , "area2" , "count" )
year.STM$ total <- newsvolume$ articles[match (year.STM$ year, newsvolume$ year)]
year.STM$ total <- tapply (year.STM$ count, year.STM$ year, "sum" , na.rm = TRUE )
year.STM$ share <- year.STM$ count / year.STM$ total
year.STM2 <- subset (year.STM, ! area2 == "Epidemics" & ! area2 == "Location" )
year.STM2[year.STM2$ area2 == "Epidemic" , "share" ] <- year.STM[year.STM$ area2 == "Epidemic" , "share" ] + year.STM[year.STM$ area2 == "Epidemics" , "share" ]
year.STM2[year.STM2$ area2 == "Epidemic" , "count" ] <- year.STM[year.STM$ area2 == "Epidemic" , "count" ] + year.STM[year.STM$ area2 == "Epidemics" , "count" ]
year.STM2[year.STM2$ area2 == "Geopolitical" , "share" ] <- year.STM[year.STM$ area2 == "Geopolitical" , "share" ] + year.STM[year.STM$ area2 == "Location" , "share" ]
year.STM2[year.STM2$ area2 == "Geopolitical" , "count" ] <- year.STM[year.STM$ area2 == "Geopolitical" , "count" ] + year.STM[year.STM$ area2 == "Location" , "count" ]
share_CL_traj <- ggplot (year.STM2, aes (y = 100 * share, x = as.numeric (year), color = area2, fill = area2)) +
geom_vline (xintercept = fifty, color = "lightblue3" , size = 1.15 ) +
geom_hline (yintercept = 0 , color = "red" ) +
geom_smooth () +
geom_point () +
facet_wrap (~ area2, scales = "free_y" , ncol = 3 ) +
theme_bluewhite () +
scale_color_viridis_d () +
scale_fill_viridis_d () +
theme (legend.position = "none" ) +
scale_x_continuous (breaks = seq (1780 , 2020 , 20 )) +
ylab ("Share of crisis labelling" ) +
xlab ("Year" )
share_CL_traj_share <- ggplot (decade.STM_area2b, aes (y = 100 * share, x = as.numeric (decade), fill = area)) +
geom_vline (xintercept = fifty, color = "lightblue3" , size = 1.15 ) +
geom_area (color = "white" ) +
theme_bluewhite () +
scale_color_viridis_d () +
scale_fill_viridis_d () +
theme (legend.position = "bottom" ) +
scale_x_continuous (breaks = seq (1780 , 2020 , 20 )) +
ylab ("Share of crisis labelling" ) +
xlab ("Year" )
share_CL_traj_abs <- ggplot (decade.STM_area2b, aes (y = articles, x = as.numeric (decade), fill = area)) +
geom_vline (xintercept = fifty, color = "lightblue3" , size = 1.15 ) +
geom_area () +
theme_bluewhite () +
scale_color_viridis_d () +
scale_fill_viridis_d () +
theme (legend.position = "bottom" ) +
scale_x_continuous (breaks = seq (1780 , 2020 , 20 )) +
ylab ("Number of articles with crisis labelling" ) +
xlab ("Year" )
share_CL_traj_share <- ggplot (decade.STM_area2b, aes (y = 100 * share, x = as.numeric (decade), fill = area)) +
geom_area (color = "white" , show.legend = FALSE ) +
geom_vline (xintercept = fifty, color = "lightblue3" , size = 1.15 ) +
theme_bluewhite () +
scale_color_viridis_d () +
scale_fill_viridis_d () +
theme (legend.position = "bottom" ) +
scale_x_continuous (breaks = seq (1780 , 2020 , 20 )) +
ylab ("Share of crisis labelling" ) +
xlab ("Year" ) +
annotate ("text" ,
x =
c (1900 , 1980 , 2008 , 1973 , 1850 , 2000 , 1800 , 1900 , 1830 , 2010 , 1820 , 2000 , 2000 , 1805 , 2020 , 2000 , 1870 , 2015 , 1860 , 2015 ), y =
c (95 , 85 , 65 , 63 , 75 , 57 , 75 , 50 , 30 , 22 , 17 , 17 , 12 , 8 , 8.5 , 5.5 , 6 , 3 , 2.5 , 1 ), label =
c ("Disaster" , "Economic" , "Education" , "Energy" , "Epidemic" , "Family" , "Functional" , "Geopolitical" , "Government" , "Health" , "Justice" , "Labor" , "Leisure" , "Military" , "Public" , "Science" , "Society" , "Technology" , "Transport" , "Welfare" ), color = c (rep (c ("white" , "black" ), each = 10 ))
)
ggsave (share_CL_traj, file = "share_CL_traj.svg" , units = "cm" , width = 16 , height = 20 , dpi = 1200 , scale = 2 )
ggsave (share_CL_traj_share, file = "share_CL_traj_share.svg" , units = "cm" , width = 16 , height = 10 , dpi = 1200 , scale = 1.75 )
ggsave (share_CL_traj_abs, file = "share_CL_traj_abs.svg" , units = "cm" , width = 16 , height = 12 , dpi = 1200 , scale = 2 )
gg_topicspectrum.event.decades <- ggplot (unique_eventcount_decade, aes (x = decade, y = UniqueTopicsCovered)) +
geom_vline (xintercept = c (1800 , 1850 , 1900 , 1950 , 2000 ), color = "lightblue" , size = 1 ) +
geom_point (size = 2.5 ) +
geom_smooth (method = "lm" , fill = "#44aa66" , color = "#44aa66" ) +
geom_smooth (fill = "#dd1c77" , color = "#dd1c77" , linetype = "longdash" ) +
geom_hline (yintercept = 106 , color = "red" ) +
theme_bluewhite () +
ylim (0 , 106 ) +
ylab ("Topics covered by crisis events in the decade" ) +
xlab ("Decade" ) +
scale_x_continuous (breaks = seq (1780 , 2020 , 10 ))
gg_areaspectrum.event.decades <- ggplot (unique_eventcount_decade_area, aes (x = decade, y = UniqueAreasCovered)) +
geom_vline (xintercept = c (1800 , 1850 , 1900 , 1950 , 2000 ), color = "lightblue" , size = 1 ) +
geom_point (size = 2.5 ) +
geom_smooth (method = "lm" , fill = "#44aa66" , color = "#44aa66" ) +
geom_smooth (fill = "#dd1c77" , color = "#dd1c77" , linetype = "longdash" ) +
geom_hline (yintercept = 22 , color = "red" ) +
theme_bluewhite () +
ylim (0 , 22 ) +
ylab ("Areas covered by crisis events in the decade" ) +
xlab ("Decade" ) +
scale_x_continuous (breaks = seq (1780 , 2020 , 10 ))
gg_areaspectrum2.event.decades <- ggplot (unique_eventcount_decade_area2, aes (x = decade, y = UniqueAreasCovered)) +
geom_vline (xintercept = c (1800 , 1850 , 1900 , 1950 , 2000 ), color = "lightblue" , size = 1 ) +
geom_point (size = 2.5 ) +
geom_smooth (method = "lm" , fill = "#44aa66" , color = "#44aa66" ) +
geom_smooth (fill = "#dd1c77" , color = "#dd1c77" , linetype = "longdash" ) +
geom_hline (yintercept = 48 , color = "red" ) +
theme_bluewhite () +
ylim (0 , 48 ) +
ylab ("Areas covered by crisis events in the decade" ) +
xlab ("Decade" ) +
scale_x_continuous (breaks = seq (1780 , 2020 , 10 ))
ggsave (gg_topicspectrum.event.decades, file = "topicspectrum_event_decade.svg" , units = "cm" , width = 16 , height = 8 , dpi = 1200 , scale = 1.5 )
ggsave (gg_areaspectrum.event.decades, file = "areaspectrum_event_decade.svg" , units = "cm" , width = 16 , height = 8 , dpi = 1200 , scale = 1.5 )
ggsave (gg_areaspectrum2.event.decades, file = "area2spectrum_event_decade.svg" , units = "cm" , width = 16 , height = 8 , dpi = 1200 , scale = 1.5 )
crisisarticles <- aggregate (wavedata.vol50$ volume, by = list (wavedata.vol50$ year), FUN = sum)
crisisarticles$ year <- as.numeric (crisisarticles$ Group.1 )
crisisarticles$ volume <- crisisarticles$ x
crisisarticles$ CNW <- crisisarticles$ volume
crisisarticles$ CL <- crisisarticles$ volume
crisisarticles$ Coverage <- crisisarticles$ volume
crisis.esc.stages <- data.frame (year = seq (1788 , 2020 , 1 ))
crisis.esc.stages$ CNW <- crisisarticles$ CNW[match (crisis.esc.stages$ year, crisisarticles$ year)]
crisis.esc.stages$ CL <- newsvolume$ crisis[match (crisis.esc.stages$ year, newsvolume$ year)]
crisis.esc.stages$ Coverage <- newsvolume$ articles[match (crisis.esc.stages$ year, newsvolume$ year)]
crisis.esc.stages$ CNWtoCoverage <- crisis.esc.stages$ CNW / crisis.esc.stages$ Coverage
crisis.esc.stages$ CLtoCoverage <- crisis.esc.stages$ CL / crisis.esc.stages$ Coverage
crisis.esc.stages$ CNWtoCL <- crisis.esc.stages$ CNW / crisis.esc.stages$ CL
crisis.esc.stages$ Coverage100 <- crisis.esc.stages$ Coverage / max (crisis.esc.stages$ Coverage)
crisis.esc.stages$ CL100 <- crisis.esc.stages$ CL / max (crisis.esc.stages$ CL)
crisis.esc.stages$ CNW100 <- crisis.esc.stages$ CNW / max (crisis.esc.stages$ CNW, na.rm = TRUE )
ggplot (crisis.esc.stages, aes (x = year)) +
geom_point (aes (y = Coverage100), color = "blue" ) +
geom_point (aes (y = CL100), color = "green" ) +
geom_point (aes (y = CNW100), color = "red" )
CL.time <- ggplot (crisis.esc.stages, aes (x = year, y = 100 * CLtoCoverage)) +
geom_vline (xintercept = fifty, color = "lightsteelblue" , size = 1 ) +
geom_point () +
geom_smooth (span = 0.25 , color = "steelblue" , fill = "steelblue" ) +
theme_bluewhite () +
ylim (0 , 5 ) +
ylab ("Share of coverage with crisis labelling (CL)" ) +
xlab ("Year" ) +
scale_x_continuous (breaks = seq (1780 , 2020 , 10 ))
CL.time.small <- ggplot (crisis.esc.stages, aes (x = year, y = 100 * CLtoCoverage)) +
geom_vline (xintercept = fifty, color = "lightsteelblue" , size = 1 ) +
geom_point () +
geom_smooth (span = 0.25 , color = "steelblue" , fill = "steelblue" ) +
theme_bluewhite () +
ylim (0 , 5 ) +
ylab ("Share of news stories \n with crisis labelling \n (% of total news coverage)" ) +
xlab ("Year" ) +
scale_x_continuous (breaks = seq (1800 , 2000 , 50 ))
ggsave (CL.time.small, file = "CL_time_small.svg" , device = "svg" , dpi = 1200 , unit = "cm" , width = 4.4 , height = 2.2 , scale = 3.00 )
ggsave (CL.time, file = "CL_time.svg" , device = "svg" , dpi = 1200 , unit = "cm" , width = 16 , height = 8 , scale = 1.25 )
CNW.time <- ggplot (crisis.esc.stages, aes (x = year, y = 100 * CNWtoCoverage)) +
geom_vline (xintercept = fifty, color = "lightsteelblue" , size = 1 ) +
geom_point () +
geom_smooth (span = 0.25 , color = "steelblue" , fill = "steelblue" ) +
theme_bluewhite () +
ylab ("Share of coverage that is part of crisis news waves (CNWs)" ) +
xlab ("Year" ) +
scale_x_continuous (breaks = seq (1780 , 2020 , 10 ))
ggsave (CNW.time, file = "CNW_time.svg" , device = "svg" , dpi = 1200 , unit = "cm" , width = 16 , height = 8 , scale = 1.25 )
crisisarticles.time <- ggplot (crisisarticles, aes (y = volume, x = year, label = year, size = (volume))) +
geom_vline (xintercept = c (1800 , 1850 , 1900 , 1950 , 2000 ), color = "lightblue" , size = 1 ) +
geom_point (color = "gray" , shape = 17 ) +
geom_text (color = "black" ) +
geom_smooth (linetype = "longdash" , fill = "#44aa66" , color = "#44aa66" , method = "lm" , na.rm = TRUE ) +
geom_smooth (fill = "#dd1c77" , color = "#dd1c77" , na.rm = TRUE ) +
xlab ("Year" ) +
ylab ("Number of articles assigned to crisis events per year" ) +
theme_bluewhite () +
theme (legend.position = "none" )
crisisarticles.time <- ggplot (crisisarticles, aes (y = volume, x = year, label = year, size = (volume))) +
geom_vline (xintercept = c (1800 , 1850 , 1900 , 1950 , 2000 ), color = "lightblue3" , size = 1 ) +
geom_point (color = "gray" , shape = 17 ) +
geom_text (color = "black" ) +
geom_smooth (linetype = "longdash" , fill = "#44aa66" , color = "#44aa66" , method = "lm" , na.rm = TRUE ) +
geom_smooth (fill = "#dd1c77" , color = "#dd1c77" , na.rm = TRUE ) +
xlab ("Year" ) +
ylab ("Number of articles assigned to crisis events per year" ) +
theme_bluewhite () +
theme (legend.position = "none" ) +
scale_y_continuous (trans = "log10" ) +
scale_x_continuous (breaks = seq (1780 , 2020 , 10 )) +
annotation_logticks ()
crisisarticles.lm <- lm (volume ~ year, data = crisisarticles)
predict (crisisarticles.lm, newdata = list (year = c (1800 , 1850 , 1900 , 1950 , 2000 , 2050 )))
ggsave (crisisarticles.time, file = "crisisarticles+time.svg" , dpi = 1200 , unit = "cm" , width = 16 , height = 8 , scale = 1.5 )
eventcount <- aggregate (wavedata.vol50$ duration / wavedata.vol50$ duration, by = list (wavedata.vol50$ year), FUN = sum)
eventcount$ year <- as.numeric (eventcount$ Group.1 )
eventcount$ count <- eventcount$ x
eventcount.time <- ggplot (eventcount, aes (y = count, x = year, label = year, size = count)) +
geom_vline (xintercept = c (1800 , 1850 , 1900 , 1950 , 2000 ), color = "lightblue3" , size = 1 ) +
geom_point (shape = 17 , color = "gray" ) +
geom_text (color = "black" ) +
geom_smooth (linetype = "longdash" , fill = "#44aa66" , color = "#44aa66" , method = "lm" , na.rm = TRUE ) +
geom_smooth (fill = "#dd1c77" , color = "#dd1c77" , na.rm = TRUE , span = .25 ) +
xlab ("Year" ) +
ylab ("Number of crisis events identified" ) +
theme_bluewhite () +
theme (legend.position = "none" ) +
scale_x_continuous (breaks = seq (1780 , 2020 , 10 ))
eventcount.time.small <- ggplot (eventcount, aes (y = count, x = year, label = year, size = count)) +
geom_vline (xintercept = c (1800 , 1850 , 1900 , 1950 , 2000 ), color = "lightblue3" , size = 1 ) +
geom_point (shape = 17 , color = "gray" ) +
geom_text (color = "black" ) +
geom_smooth (linetype = "longdash" , fill = "#44aa66" , color = "#44aa66" , method = "lm" , na.rm = TRUE ) +
geom_smooth (fill = "#dd1c77" , color = "#dd1c77" , na.rm = TRUE , span = .25 ) +
xlab ("Year" ) +
ylab ("Number of crisis \n news waves identified" ) +
theme_bluewhite () +
theme (legend.position = "none" ) +
scale_x_continuous (breaks = seq (1800 , 2000 , 50 ))
ggsave (eventcount.time.small, file = "eventcount_time_small.svg" , device = "svg" , dpi = 1200 , unit = "cm" , width = 4.4 , height = 2.2 , scale = 3.00 )
wavedata.vol50 <- wd250x
wavedata.vol50$ YEAR <- as.numeric (wavedata.vol50$ year)
wavedata.vol50$ AREA_i <- to.lab[match (wavedata.vol50$ topic250, to.lab$ topic250), "topic20" ]
areas.by.year <- tapply (wavedata.vol50$ YEAR, wavedata.vol50$ AREA_i, min, na.rm = TRUE )
wavedata.vol50$ AREA_f <- factor (wavedata.vol50$ AREA_i, levels = names (areas.by.year)[order (areas.by.year)], ordered = TRUE )
inf <- c (plasma (n = 10 , alpha = 1 , begin = 0.1 , end = 0.9 , direction = - 1 ))
infr <- c (viridis (n = 10 , alpha = 1 , begin = 0.1 , end = 0.9 , direction = 1 ))
infscale <- c (inf[1 ], infr[1 ], inf[2 ], infr[2 ], inf[3 ], infr[3 ], inf[4 ], infr[4 ], inf[5 ], infr[5 ], inf[6 ], infr[6 ], inf[7 ], infr[7 ], inf[8 ], infr[8 ], inf[9 ], infr[9 ], inf[10 ])
eventscope <- ggplot (subset (wavedata.vol50, ! is.na (AREA_i)), aes (x = YEAR, y = AREA_f, group = AREA_i, color = AREA_i, fill = AREA_i, size = volume, label = YEAR)) +
geom_vline (xintercept = fifty, color = "#a99171" , size = 1.5 ) +
geom_jitter (shape = 15 , alpha = .8 , width = 0 , height = 0.25 , show.legend = FALSE ) +
theme_soft () +
ylab ("Topic area" ) +
scale_x_continuous (breaks = seq (1780 , 2020 , 10 )) +
scale_color_manual (values = infscale)
ggsave (eventscope, file = "eventscope.svg" , dpi = 1200 , unit = "cm" , width = 16 , height = 8 , scale = 1.5 )
ggplot (subset (wd250x, topic20 == "ENV" ), aes (x = as.numeric (year), y = 1 , label = year, size = log (volume))) +
geom_text (position = position_jitter (0.1 )) +
theme_soft () +
scale_x_continuous (limits = c (1780 , 2020 ), breaks = seq (1800 , 2000 , 50 ), minor_breaks = seq (1780 , 2020 , 10 ))
ggplot (subset (wd250x, topic20 == "NRG" ), aes (x = as.numeric (year), y = 1 , label = year, size = log (volume))) +
geom_text (position = position_jitter (0.1 )) +
theme_soft () +
scale_x_continuous (limits = c (1780 , 2020 ), breaks = seq (1800 , 2000 , 50 ), minor_breaks = seq (1780 , 2020 , 10 ))
ggplot (subset (wd250x, topic20 == "EPI" ), aes (x = as.numeric (year), y = 1 , label = year, size = log (volume))) +
geom_text (position = position_jitter (0.1 )) +
theme_soft () +
scale_x_continuous (limits = c (1780 , 2020 ), breaks = seq (1800 , 2000 , 50 ), minor_breaks = seq (1780 , 2020 , 10 ))
ggplot (subset (wd250x, topic20 == "DIS" ), aes (x = as.numeric (year), y = 1 , label = year, size = log (volume))) +
geom_text (position = position_jitter (0.1 )) +
theme_soft () +
scale_x_continuous (limits = c (1780 , 2020 ), breaks = seq (1800 , 2000 , 50 ), minor_breaks = seq (1780 , 2020 , 10 ))
ggplot (subset (wd250x, topic20 == "TRA" ), aes (x = as.numeric (year), y = 1 , label = year, size = log (volume))) +
geom_text (position = position_jitter (0.1 )) +
theme_soft () +
scale_x_continuous (limits = c (1780 , 2020 ), breaks = seq (1800 , 2000 , 50 ), minor_breaks = seq (1780 , 2020 , 10 ))
ggplot (subset (wd250x, topic20 == "ECO" ), aes (x = as.numeric (year), y = 1 , label = year, size = log (volume))) +
geom_text (position = position_jitter (0.1 )) +
theme_soft () +
scale_x_continuous (limits = c (1780 , 2020 ), breaks = seq (1800 , 2000 , 50 ), minor_breaks = seq (1780 , 2020 , 10 ))
ggplot (subset (wd250x, topic20 == "HEA" ), aes (x = as.numeric (year), y = 1 , label = year, size = log (volume))) +
geom_text (position = position_jitter (0.1 )) +
theme_soft () +
scale_x_continuous (limits = c (1780 , 2020 ), breaks = seq (1800 , 2000 , 50 ), minor_breaks = seq (1780 , 2020 , 10 ))
ggplot (subset (wd250x, topic20 %in% c ("SCI" , "EDU" )), aes (x = as.numeric (year), y = 1 , label = year, size = log (volume))) +
geom_text (position = position_jitter (0.1 )) +
theme_soft () +
scale_x_continuous (limits = c (1780 , 2020 ), breaks = seq (1800 , 2000 , 50 ), minor_breaks = seq (1780 , 2020 , 10 ))
ggplot (subset (wd250x, topic20 %in% c ("GEO" )), aes (x = as.numeric (year), y = 1 , label = year, size = log (volume))) +
geom_text (position = position_jitter (0.1 )) +
theme_soft () +
scale_x_continuous (limits = c (1780 , 2020 ), breaks = seq (1800 , 2000 , 50 ), minor_breaks = seq (1780 , 2020 , 10 ))
ggplot (subset (wd250x, topic20 %in% c ("LEI" )), aes (x = as.numeric (year), y = 1 , label = year, size = log (volume))) +
geom_text (position = position_jitter (0.1 )) +
theme_soft () +
scale_x_continuous (limits = c (1780 , 2020 ), breaks = seq (1800 , 2000 , 50 ), minor_breaks = seq (1780 , 2020 , 10 ))
eventscope.small <- ggplot (subset (wavedata.vol50, ! is.na (AREA_i) & ! (AREA_i %in% c ("Functional" , "Family" , "Society" , "Education" , "ScienceTech" , "Public" ))), aes (x = YEAR, y = AREA_f, group = AREA_i, color = AREA_i, fill = AREA_i, size = volume, label = YEAR)) +
geom_vline (xintercept = fifty, color = "lightskyblue2" , size = 1.5 ) +
geom_jitter (width = 0 , height = 0.25 , show.legend = FALSE ) +
theme_bluewhite () +
ylab ("Topic area" ) +
scale_x_continuous (breaks = seq (1800 , 2000 , 50 ))
ggsave (eventscope.small, file = "eventscope_small.svg" , dpi = 1200 , unit = "cm" , width = 4.2 , height = 2.1 , scale = 3 )
eventcount.lm <- lm (count ~ year, data = eventcount)
predict (eventcount.lm, newdata = list (year = c (1800 , 1850 , 1900 , 1950 , 2000 , 2050 )))
summary (lm (x ~ as.numeric (Group.1 ), data = x))
ggsave (eventcount.time, file = "eventcount+time.svg" , dpi = 1200 , unit = "cm" , width = 16 , height = 8 , scale = 1.5 )
firstyear <- tapply (wavedata.vol50$ YEAR, wavedata.vol50$ topic, min, na.rm = T)
topictrafo <- firstyear[order (firstyear)]
topic.trafo <- data.frame (original = names (topictrafo), modified = 1 : 120 )
wavedata.vol50$ topic.trafo <- factor (topic.trafo[match (wavedata.vol50$ topic, topic.trafo$ original), "modified" ])
datebreaks <- as.POSIXct (c (
"1780-01-01" , "1790-01-01" , "1800-01-01" , "1810-01-01" , "1820-01-01" , "1830-01-01" , "1840-01-01" , "1850-01-01" ,
"1860-01-01" , "1870-01-01" , "1880-01-01" , "1890-01-01" , "1900-01-01" ,
"1910-01-01" , "1920-01-01" , "1930-01-01" , "1940-01-01" , "1950-01-01" ,
"1960-01-01" , "1970-01-01" , "1980-01-01" , "1990-01-01" , "2000-01-01" ,
"2010-01-01" , "2020-01-01"
))
fifty2 <- as.POSIXct (c ("1800-01-01" , "1850-01-01" , "1900-01-01" , "1950-01-01" , "2000-01-01" ))
crisisplot <- ggplot (wavedata.vol50, aes (color = topic.trafo, fill = topic.trafo, xmin = as.POSIXct (start * 60 * 60 * 24 , origin = "0000-01-01" ), xmax = as.POSIXct ((start + duration * 10 ) * 60 * 60 * 24 , origin = "0000-01-01" ), ymin = as.numeric (topic.trafo), ymax = as.numeric (topic.trafo) + 2 * intensity)) +
geom_vline (xintercept = fifty2, size = 1.15 , color = "lightblue3" ) +
geom_rect (fill = "aliceblue" , color = "skyblue3" , size = 1.5 , aes (ymin = 0 , ymax = 110 , xmin = as.POSIXct ("1788-01-01" ), xmax = as.POSIXct ("2022-12-31" ))) +
geom_rect () +
scale_fill_viridis_d (na.value = "grey80" ) +
scale_color_viridis_d (na.value = "grey80" ) +
theme_bluewhite () +
ylim (0 , 110 ) +
ylab ("Topic ID" ) +
xlab ("Year" ) +
theme (legend.position = "none" ) +
scale_x_continuous (breaks = datebreaks, labels = seq (1780 , 2020 , 10 )) +
scale_y_continuous (breaks = seq (10 , 100 , 10 ), minor_breaks = seq (0 , 110 , 5 ))
ggsave (crisisplot, file = "crisisdensity+time.svg" , dpi = 1200 , unit = "cm" , width = 16 , height = 8 , scale = 1.25 )
years50 <- c (1800 , 1850 , 1900 , 1950 , 2000 , 2050 )
wavedata.vol50$ YEAR <- as.numeric (wavedata.vol50$ year)
wavedata.vol50$ TotalNewspaperVolume <- thetimes[match (wavedata.vol50$ year, thetimes$ year), "articles" ]
volume.lm <- (lm (volume ~ (YEAR), data = wavedata.vol50))
predict (volume.lm, newdata = list (YEAR = years50))
duration.lm <- (lm (duration ~ (YEAR), data = wavedata.vol50))
predict (duration.lm, newdata = list (YEAR = years50))
intensity.lm <- (lm (intensity ~ (YEAR), data = wavedata.vol50))
predict (intensity.lm, newdata = list (YEAR = years50))
max.intensity.lm <- (lm (max.intensity ~ (YEAR), data = wavedata.vol50))
predict (max.intensity.lm, newdata = list (YEAR = years50))
variability.lm <- (lm (variability ~ (YEAR), data = wavedata.vol50))
predict (variability.lm, newdata = list (YEAR = years50))
summary (lm (duration ~ I (as.numeric (year) - 1788 ), data = wavedata.vol50))
summary (lm (intensity_i ~ I (as.numeric (year) - 1788 ), data = wavedata.vol50))
summary (lm (max.intensity ~ I (as.numeric (year) - 1788 ), data = wavedata.vol50))
summary (lm (variability ~ I (as.numeric (year) - 1788 ), data = wavedata.vol50))
summary (lm ((variability / intensity) ~ I (as.numeric (year) - 1788 ), data = wavedata.vol50))
summary (lm (baseline365 ~ I (as.numeric (year) - 1788 ), data = wavedata.vol50))
wavedata.vol50$ intensity_i <- wavedata.vol50$ volume / wavedata.vol50$ duration
intensity.trajectory <- ggplot (wavedata.vol50, aes (x = as.numeric (year), y = intensity_i)) +
geom_jitter (size = 0.5 ) +
geom_smooth (linetype = "longdash" , fill = "#44aa66" , color = "#44aa66" , method = "lm" , na.rm = TRUE ) +
geom_smooth (fill = "#dd1c77" , color = "#dd1c77" ) +
theme_bluewhite () +
xlab ("Year" ) +
ylab ("Average daily intensity of coverage \n during the identified crisis event" ) +
scale_y_log10 (breaks = c (0.05 , 0.1 , 1 , 5 ), limits = c (0.05 , 5 )) +
annotation_logticks () +
scale_x_continuous (breaks = seq (1780 , 2020 , 10 ), minor_breaks = (seq (1780 , 2020 , 2 )))
ggsave (intensity.trajectory, file = "intensity+time.svg" , dpi = 1200 , unit = "cm" , width = 16 , height = 8 , scale = 1.25 )
duration.trajectory <- ggplot (wavedata.vol50, aes (x = as.numeric (year), y = duration)) +
geom_jitter (size = 0.5 ) +
geom_smooth (linetype = "longdash" , fill = "#44aa66" , color = "#44aa66" , method = "lm" , na.rm = TRUE ) +
geom_smooth (fill = "#dd1c77" , color = "#dd1c77" ) +
theme_bluewhite () +
xlab ("Year" ) +
ylab ("Average duration of above-baseline coverage \n during the identified crisis event" ) +
ylim (0 , 160 ) +
scale_x_continuous (breaks = seq (1780 , 2020 , 10 ), minor_breaks = (seq (1780 , 2020 , 2 )))
ggsave (duration.trajectory, file = "duration+time.svg" , dpi = 1200 , unit = "cm" , width = 16 , height = 8 , scale = 1.25 )
volume.trajectory <- ggplot (wavedata.vol50, aes (x = as.numeric (year), y = volume)) +
geom_jitter (size = 0.5 ) +
geom_smooth (linetype = "longdash" , fill = "#44aa66" , color = "#44aa66" , method = "lm" , na.rm = TRUE ) +
geom_smooth (fill = "#dd1c77" , color = "#dd1c77" ) +
theme_bluewhite () +
xlab ("Year" ) +
ylab ("Total volume of coverage \n during the identified crisis case" ) +
ylim (0 , 500 ) +
scale_x_continuous (breaks = seq (1780 , 2020 , 10 ), minor_breaks = seq (1780 , 2020 , 2 )) +
scale_y_log10 (breaks = c (5 , 10 , 50 , 100 , 500 ), limits = c (5 , 500 )) +
annotation_logticks ()
volume.trajectory
ggsave (volume.trajectory, file = "volume+time.svg" , dpi = 1200 , unit = "cm" , width = 16 , height = 8 , scale = 1.25 )
volume.trajectory.small <- ggplot (wavedata.vol50, aes (x = as.numeric (year), y = volume)) +
geom_jitter (size = 0.5 ) +
geom_smooth (linetype = "longdash" , fill = "#44aa66" , color = "#44aa66" , method = "lm" , na.rm = TRUE ) +
geom_smooth (fill = "#dd1c77" , color = "#dd1c77" ) +
theme_bluewhite () +
xlab ("Year" ) +
ylab ("Total volume of coverage \n during crisis news waves" ) +
ylim (0 , 500 ) +
scale_x_continuous (breaks = seq (1800 , 2000 , 50 ), minor_breaks = seq (1775 , 2025 , 25 )) +
scale_y_log10 (breaks = c (5 , 10 , 50 , 100 , 500 ), limits = c (5 , 500 )) +
annotation_logticks ()
ggsave (volume.trajectory.small, file = "volume+time_small.svg" , dpi = 1200 , unit = "cm" , width = 4.2 , height = 2.1 , scale = 3.00 )
peak.trajectory <- ggplot (wavedata.vol50, aes (x = as.numeric (year), y = max.intensity)) +
geom_jitter (size = 0.5 ) +
geom_smooth (linetype = "longdash" , fill = "#44aa66" , color = "#44aa66" , method = "lm" , na.rm = TRUE ) +
geom_smooth (fill = "#dd1c77" , color = "#dd1c77" ) +
theme_bluewhite () +
xlab ("Year" ) +
ylab ("Maximum amount of coverage \n during the identified crisis case" ) +
scale_y_log10 (breaks = c (1 , 10 , 100 ), limits = c (0.1 , 100 )) +
scale_x_continuous (breaks = seq (1780 , 2020 , 10 ), minor_breaks = seq (1780 , 2020 , 2 )) +
annotation_logticks ()
ggsave (peak.trajectory, file = "peak+time.svg" , dpi = 1200 , unit = "cm" , width = 16 , height = 8 , scale = 1.25 )
relative.variability.trajectory <- ggplot (wavedata.vol50, aes (x = as.numeric (year), y = variability / intensity)) +
geom_point () +
geom_smooth () +
theme_bluewhite ()
variability.trajectory <- ggplot (wavedata.vol50, aes (x = as.numeric (year), y = variability)) +
geom_point () +
geom_smooth () +
theme_bluewhite ()
### Total volume of coverage
x <- data.frame (table (wide.STM$ year))
newsvolume$ crisis <- x[match (newsvolume$ year, x$ Var1), "Freq" ]
newsvolume$ crisis.share <- newsvolume$ crisis / newsvolume$ articles
crisis.labelling <- ggplot (newsvolume, aes (x = year, y = 100 * crisis.share)) +
geom_vline (xintercept = fifty, size = 1.15 , color = "lightblue3" ) +
geom_point () +
geom_smooth (span = 0.25 , na.rm = TRUE , fill = "#dd1c77" , color = "#dd1c77" ) +
geom_smooth (na.rm = TRUE , method = "lm" , fill = "#44aa66" , color = "#44aa66" , ) +
theme_bluewhite () +
scale_x_continuous (breaks = seq (1780 , 2020 , 10 ), minor_breaks = seq (1780 , 2020 , 2 )) +
xlab ("Year" ) +
ylab ("Share of total coverage with crisis labelling" )
topic.STM$ Total <- sum (topic.STM$ x)
topic.STM$ Share <- topic.STM$ x / topic.STM$ Total
topic.STM$ Topics <- factor (topic.STM$ Group.1 , levels = topic.STM$ Group.1 [order (topic.STM$ Share, decreasing = TRUE )], ordered = TRUE )
economic.topics <- ggplot (topic.STM, aes (y = 100 * Share, x = Topics, color = Topics, fill = Topics)) +
geom_col () +
geom_text (aes (label = 100 * round (Share, 3 ), y = 100 * Share + 1 ), color = "black" , size = 3 ) +
theme_bluewhite () +
theme (axis.text.x = element_text (angle = 45 , hjust = 1 )) +
ylab ("Share of topics in coverage with crisis labelling" ) +
scale_color_viridis_d () +
scale_fill_viridis_d () +
theme (legend.position = "none" )
topicareaplot2 <- function (d, area2) {
d <- d
d$ total <- tapply (d$ article, d$ decade, sum)[match (d$ decade, d$ decade[1 : 25 ])]
d$ percentage <- round (100 * d$ article / d$ total, 1 )
gg <- ggplot (d, aes (y = percentage, x = decade, fill = area)) +
geom_area (color = "white" ) +
geom_hline (yintercept = 0 , color = "#dd4422" ) +
theme_light () +
ggtitle (area2) +
scale_x_continuous (breaks = seq (1780 , 2020 , 10 )) +
scale_fill_viridis_d ()
return (gg)
}
topicareaplot3 <- function (d, area2) {
d <- d
d$ total <- tapply (d$ article, d$ decade, sum)[match (d$ decade, d$ decade[1 : 25 ])]
d$ percentage <- round (100 * d$ article / d$ total, 1 )
gg <- ggplot (d, aes (y = percentage, x = decade, fill = area_ordered)) +
geom_area (color = "white" ) +
geom_hline (yintercept = 0 , color = "#dd4422" ) +
theme_light () +
scale_x_continuous (breaks = seq (1780 , 2020 , 10 )) +
scale_fill_viridis_d ()
return (gg)
}
decade.STM_area2$ area_ordered <- factor (decade.STM_area2$ area, ordered = TRUE , levels = names (table (decade.STM_area2$ area))[order (tapply (decade.STM_area2$ articles, decade.STM_area2$ area, sum, na.rm = TRUE ), decreasing = FALSE )])
economic.topics.time.share <- topicareaplot3 (d = decade.STM_area2, area2 = names (table (decade.STM_area2$ area))) + geom_vline (xintercept = fifty, size = 1.15 , color = "lightblue3" ) + theme_bluewhite () + scale_fill_manual (values = c ("#f7fbff" , "#00441b" , "#deebf7" , "#006d2c" , "#c6dbef" , "#238b45" , "#9ecae1" , "#66a182" , "#00798c" , "#6baed6" , "#41ab5d" , "#4292c6" , "#74c476" , "#2171b5" , "#a1d99b" , "#08519c" , "#c7e9c0" , "#08306b" , "#e5f5e0" , "#111111" , "#8d96a3" , "red" )) + theme (legend.position = "right" ) + ylab ("Share of articles with crisis labelling" ) + guides (fill = guide_legend ("Topic area" ))
economic.topics.time.abs <- ggplot (data = decade.STM_area2, aes (y = articles, x = decade, fill = area_ordered)) +
geom_vline (xintercept = fifty, size = 1.15 , color = "lightblue3" ) +
geom_area () +
scale_x_continuous (breaks = seq (1780 , 2020 , 10 )) +
theme_bluewhite () +
scale_fill_manual (values = c ("#555555" , "#565656" , "#666666" , "#676767" , "#777777" , "#787878" , "#888888" , "#898989" , "#999999" , "#9a9a9a" , "#aaaaaa" , "#ababab" , "#bbbbbb" , "#bcbcbc" , "#cccccc" , "#cdcdcd" , "#dddddd" , "#dedede" , "#eeeeee" , "#fefefe" , "#ffffff" , "red" )) +
theme (legend.position = c (0.2 , .525 )) +
ylab ("Count of articles with crisis labelling" ) +
guides (fill = guide_legend ("Topic area" ))
ecotoplist <- names (table (decade.STM_area$ area))[2 : 12 ]
withineconomic.topics.time.share <- topicareaplot2 (d = subset (decade.STM_area, area %in% ecotoplist), area2 = ecotoplist) + theme_bluewhite () + theme (legend.position = "right" ) + geom_vline (xintercept = fifty, size = 1.15 , color = "lightblue3" ) + ylab ("Share of economic crisis labelling" )
withineconomic.topics.time.abs <- ggplot (data = subset (decade.STM_area, area %in% ecotoplist), aes (y = articles, x = decade, fill = area)) +
geom_vline (xintercept = fifty, size = 1.15 , color = "lightblue3" ) +
geom_area () +
scale_x_continuous (breaks = seq (1780 , 2020 , 10 )) +
theme_bluewhite () +
scale_fill_viridis_d () +
ylab ("Number of articles with crisis labelling" ) +
theme (legend.position = (c (0.15 , 0.5 ))) +
guides (fill = guide_legend ("Topic within Economy" ))
wavedata.vol50$ ECO <- ifelse (wavedata.vol50$ AREA2 %in% ecotoplist, "Economic Crisis Event" , "Non-Economic Crisis Event" )
eco.CE.decade <- data.frame (table (wavedata.vol50$ decade, wavedata.vol50$ ECO))
names (eco.CE.decade) <- c ("Decade" , "EventType" , "Count" )
eco.CE.decade$ rowmax <- as.numeric (as.character (Recode (eco.CE.decade$ EventType, "'Economic Crisis Event'=27;'Non-Economic Crisis Event'=98;else=NA" )))
eco.CE.decade$ Index <- 100 * eco.CE.decade$ Count / eco.CE.decade$ rowmax
eco.CE.decade$ AllEvents <- rep (subset (eco.CE.decade, EventType == "Economic Crisis Event" )$ Count + subset (eco.CE.decade, EventType == "Non-Economic Crisis Event" )$ Count, times = 2 )
eco.CE.time <- ggplot (data = subset (eco.CE.decade, EventType == "Economic Crisis Event" ), aes (x = as.numeric (as.character (Decade)), y = Count)) +
geom_vline (xintercept = fifty, color = "lightblue3" , size = 1.15 ) +
geom_point () +
geom_smooth (linetype = "longdash" , fill = "#44aa66" , color = "#44aa66" , method = "lm" , na.rm = TRUE ) +
geom_smooth (fill = "#dd1c77" , color = "#dd1c77" ) +
theme_bluewhite () +
xlab ("Decade" ) +
scale_x_continuous (breaks = seq (1780 , 2020 , 10 )) +
ylab ("Economic Crisis Event Count" )
comp.CE.time.Index <- ggplot (data = eco.CE.decade, aes (x = as.numeric (as.character (Decade)), y = Index, color = EventType, fill = EventType, group = EventType)) +
geom_vline (xintercept = fifty, color = "lightblue3" , size = 1.15 ) +
geom_point () +
geom_smooth () +
theme_bluewhite () +
xlab ("Decade" ) +
scale_x_continuous (breaks = seq (1780 , 2020 , 10 )) +
theme (legend.position = c (0.825 , 0.125 )) +
scale_color_viridis_d (begin = .2 , end = .8 ) +
scale_fill_viridis_d (begin = .2 , end = .8 ) +
ylab ("Index of Crisis Events (100=maximum)" )
comp.CE.time.abs <- ggplot (data = eco.CE.decade, aes (x = as.numeric (as.character (Decade)), y = Count, color = EventType, fill = EventType, group = EventType)) +
geom_vline (xintercept = fifty, color = "lightblue3" , size = 1.15 ) +
geom_point () +
geom_smooth () +
theme_bluewhite () +
xlab ("Decade" ) +
scale_x_continuous (breaks = seq (1780 , 2020 , 10 )) +
theme (legend.position = c (0.8 , 0.8 )) +
scale_color_viridis_d (begin = .2 , end = .8 ) +
scale_fill_viridis_d (begin = .2 , end = .8 ) +
ylab ("Count of Crisis Events" )
comp.CE.time.col <- ggplot (data = eco.CE.decade, aes (x = as.numeric (as.character (Decade)), y = 100 * Count / AllEvents, color = EventType, fill = EventType, group = EventType)) +
geom_vline (xintercept = fifty, color = "lightblue3" , size = 1.15 ) +
geom_col () +
theme_bluewhite () +
xlab ("Decade" ) +
scale_x_continuous (breaks = seq (1780 , 2020 , 10 )) +
theme (legend.position = c (0.2 , 0.2 )) +
scale_color_viridis_d (begin = .2 , end = .8 ) +
scale_fill_viridis_d (begin = .2 , end = .8 ) +
ylab ("Share of Crisis Events" )
economic.crisis.types.decades <- ggplot (data = subset (wavedata.vol50, AREA == "Economy" ), aes (x = decade, color = AREA2, fill = AREA2)) +
geom_vline (xintercept = fifty, color = "lightblue3" , size = 1.15 ) +
geom_bar (position = "fill" ) +
theme_bluewhite () +
xlab ("Decade" ) +
scale_x_continuous (breaks = seq (1780 , 2020 , 10 )) +
scale_fill_viridis_d () +
scale_color_viridis_d ()
crisis.type.volume.trajectory <- ggplot (wavedata.vol50, aes (size = volume, x = YEAR, y = volume, color = ECO, fill = ECO, group = ECO)) +
geom_vline (xintercept = fifty, color = "lightblue3" , size = 1.15 ) +
geom_point () +
geom_smooth () +
theme_bluewhite () +
scale_x_continuous (breaks = seq (1780 , 2020 , 10 )) +
scale_color_viridis_d (option = "inferno" , begin = .2 , end = .8 ) +
scale_fill_viridis_d (option = "inferno" , begin = .2 , end = .8 ) +
theme (legend.position = "bottom" ) +
guides (size = "none" ) +
scale_y_log10 () +
annotation_logticks ()
crisis.type.duration.trajectory <- ggplot (wavedata.vol50, aes (size = duration, x = YEAR, y = duration, color = ECO, fill = ECO, group = ECO)) +
geom_vline (xintercept = fifty, color = "lightblue3" , size = 1.15 ) +
geom_point (alpha = .3 ) +
geom_smooth () +
theme_bluewhite () +
scale_x_continuous (breaks = seq (1780 , 2020 , 10 )) +
scale_color_viridis_d (option = "inferno" , begin = .2 , end = .8 ) +
scale_fill_viridis_d (option = "inferno" , begin = .2 , end = .8 ) +
theme (legend.position = "bottom" ) +
guides (size = "none" )
eco.CE.year <- data.frame (table (wavedata.vol50$ YEAR, wavedata.vol50$ ECO))
eco.CE.year2 <- data.frame (year = eco.CE.year[1 : 172 , 1 ], ECO = eco.CE.year[1 : 172 , 3 ], NonECO = eco.CE.year[173 : 344 , 3 ])
fifty <- c (1800 , 1850 , 1900 , 1950 , 2000 )
crisis.spillover <- ggplot (eco.CE.year2, aes (size = ECO, x = as.numeric (as.character (year)), y = NonECO, color = ECO, fill = ECO)) +
geom_vline (xintercept = fifty, color = "lightblue3" , size = 1.15 ) +
geom_point () +
theme_bluewhite () +
scale_x_continuous (breaks = seq (1780 , 2020 , 10 )) +
scale_color_viridis (option = "inferno" , begin = .2 , end = .8 ) +
scale_fill_viridis (option = "inferno" , begin = .2 , end = .8 ) +
theme (legend.position = c (0.15 , 0.7 )) +
guides (size = guide_legend ("Count of \n Economic \n Crisis Events" ), color = guide_legend ("Count of \n Economic \n Crisis Events" ), fill = guide_legend ("Count of \n Economic \n Crisis Events" )) +
xlab ("Year" ) +
ylab ("Count of Non-Economic Crisis Events" )
CE.volume <- aggregate (wavedata.vol50$ volume, by = list (wavedata.vol50$ YEAR), FUN = "sum" )
newsvolume$ CE.coverage <- CE.volume[match (newsvolume$ year, CE.volume$ Group.1 ), "x" ]
newsvolume$ CE.coverage <- replace (newsvolume$ CE.coverage, is.na (newsvolume$ CE.coverage), 0 )
newsvolume$ crisis <- replace (newsvolume$ crisis, is.na (newsvolume$ crisis), 0 )
newsvolume$ CEtoCL <- newsvolume$ CE.coverage / newsvolume$ crisis
CE_to_CL_trajectory <- ggplot (newsvolume, aes (x = year, y = 100 * CEtoCL)) +
geom_vline (xintercept = fifty, color = "lightblue3" , size = 1.15 ) +
geom_smooth (color = "deepskyblue4" , fill = "deepskyblue4" ) +
theme_bluewhite () +
scale_x_continuous (breaks = seq (1780 , 2020 , 10 ), minor_breaks = seq (1780 , 2020 , 2 )) +
scale_y_continuous (breaks = seq (0 , 100 , 20 ), minor_breaks = seq (0 , 100 , 10 ), limits = c (0 , 100 )) +
ylab ("Share of crisis labelling coverage \n that is linked to a crisis event" )
ggplot (newsvolume, aes (x = year, y = 100 * CE.coverage / articles)) +
geom_point () +
geom_smooth ()
ggplot (newsvolume, aes (x = year, y = 100 * crisis / articles)) +
geom_point () +
geom_smooth ()
ggplot (newsvolume, aes (x = year, y = 100 * CE.coverage / crisis)) +
geom_point () +
geom_smooth ()
### What share of crisis labelling can be assined to specific crisis events?
ggsave (CE_to_CL_trajectory, file = "CE_to_CL_trajectory.svg" , device = "svg" , dpi = 1200 , unit = "cm" , width = 16 , height = 8 , scale = 1.25 )
### Distribution of topics in crisis labelling
ggsave (crisis.labelling, file = "CL_trejectory.svg" , device = "svg" , dpi = 1200 , unit = "cm" , width = 16 , height = 8 , scale = 1.25 )
### Distribution of topics in crisis labelling
ggsave (economic.topics, file = "eco_CL_share_cs.svg" , device = "svg" , dpi = 1200 , unit = "cm" , width = 16 , height = 8 , scale = 1.25 )
### How has the salience of economic topics with crisis labelling developed? Shares.
ggsave (economic.topics.time.share, file = "eco_CL_share.svg" , device = "svg" , dpi = 1200 , unit = "cm" , width = 16 , height = 8 , scale = 1.25 )
### How has the salience of economic topics with crisis labelling developed? Absolute numbers.
ggsave (economic.topics.time.abs, file = "eco_CL_abs.svg" , device = "svg" , dpi = 1200 , unit = "cm" , width = 16 , height = 8 , scale = 1.25 )
### How has the distribution of economic topics with crisis labelling developed? Shares.
ggsave (withineconomic.topics.time.share, file = "within_eco_CL_share.svg" , device = "svg" , dpi = 1200 , unit = "cm" , width = 16 , height = 8 , scale = 1.25 )
### How has the distribution of economic topics with crisis labelling developed? Absolute numbers.
ggsave (withineconomic.topics.time.abs, file = "within_eco_CL_abs.svg" , device = "svg" , dpi = 1200 , unit = "cm" , width = 16 , height = 8 , scale = 1.25 )
### How has the count of economic crisis events developed?
ggsave (eco.CE.time, file = "economic_CE_trajectory.svg" , device = "svg" , dpi = 1200 , unit = "cm" , width = 16 , height = 8 , scale = 1.25 )
### How has the count of economic (vs non-economic) crisis events developed?
ggsave (comp.CE.time.Index, file = "economic_vs_noneconomic_CE_trajectory_index.svg" , device = "svg" , dpi = 1200 , unit = "cm" , width = 16 , height = 8 , scale = 1.25 )
### How has the count of economic (vs non-economic) crisis events developed?
ggsave (comp.CE.time.abs, file = "economic_vs_noneconomic_CE_trajectory_abs.svg" , device = "svg" , dpi = 1200 , unit = "cm" , width = 16 , height = 8 , scale = 1.25 )
### How has the share of economic (vs non-economic) crisis events developed?
ggsave (comp.CE.time.col, file = "economic_vs_noneconomic_CE_trajectory.svg" , device = "svg" , dpi = 1200 , unit = "cm" , width = 16 , height = 8 , scale = 1.25 )
### How has the composition of economic crisis event topics developed?
ggsave (economic.crisis.types.decades, file = "within_economic_topics_trajectory.svg" , device = "svg" , dpi = 1200 , unit = "cm" , width = 16 , height = 8 , scale = 1.25 )
### How has the volume of economic and non-economic crisis events developed?
ggsave (crisis.type.volume.trajectory, file = "crisis_type_volume_trajectory.svg" , device = "svg" , dpi = 1200 , unit = "cm" , width = 16 , height = 8 , scale = 1.25 )
### How has the duration of economic and non-economic crisis events developed?
ggsave (crisis.type.duration.trajectory, file = "crisis_type_duration_trajectory.svg" , device = "svg" , dpi = 1200 , unit = "cm" , width = 16 , height = 8 , scale = 1.25 )
### Do economic and non-economic crises co-occur more frequently today?
ggsave (crisis.spillover, file = "crisis_cooccurrence.svg" , device = "svg" , dpi = 1200 , unit = "cm" , width = 16 , height = 8 , scale = 1.25 )
ggplot (epiq, aes (x = YEAR, y = volume, size = volume)) +
geom_point () +
theme_bluewhite () +
scale_x_continuous (breaks = seq (1780 , 2020 , 10 ))
max.h <- entropy (rep (1 / 120 , times = 120 ), method = "ML" )
decades <- seq (1780 , 2010 , 10 )
h <- data.frame (decades = decades, raw.entropy = NA , std.entropy = NA , max.entropy = max.h)
decade.STM$ total <- tapply (decade.STM$ article, decade.STM$ decade, sum, na.rm = TRUE )
decade.STM$ share <- decade.STM$ article / decade.STM$ total
for (i in 1 : length (decades))
{
h[i, "raw.entropy" ] <- entropy (subset (decade.STM, decade == decades[[i]])$ share, method = "ML" )
h[i, "std.entropy" ] <- h[i, "raw.entropy" ] / h[i, "max.entropy" ]
}
entropy_coverage <- ggplot (subset (h, decades != 2030 ), aes (x = decades, y = std.entropy)) +
geom_vline (xintercept = fifty, size = 1.15 , color = "lightblue3" ) +
geom_point () +
geom_smooth (method = "lm" , fill = "#44aa66" , color = "#44aa66" ) +
geom_smooth (fill = "#dd1c77" , color = "#dd1c77" , linetype = "longdash" ) +
geom_hline (yintercept = 1 , color = "red" ) +
theme_light () +
ylim (0.7 , 1 ) +
ylab ("Standardized Entropy" ) +
xlab ("Decade" ) +
theme_bluewhite () +
scale_x_continuous (breaks = seq (1780 , 2020 , 10 ))
ggsave (entropy_coverage, file = "topicentropy_decade.svg" , units = "cm" , width = 16 , height = 8 , dpi = 1200 , scale = 1.35 )
CE.Economy <- subset (wavedata.vol50, AREA == "Economy" | AREA == "Energy" )
CE.Disaster <- subset (wavedata.vol50, AREA == "Disaster" )
CE.Epidemic <- subset (wavedata.vol50, AREA == "Epidemic" | AREA == "Epidemics" | AREA == "Health" )
CE.Functional <- subset (wavedata.vol50, AREA == "Functional" )
CE.Geopolitical <- subset (wavedata.vol50, AREA == "Geopolitical" )
CE.Government <- subset (wavedata.vol50, AREA == "Government" )
CE.Justice <- subset (wavedata.vol50, AREA == "Justice" )
CE.Military <- subset (wavedata.vol50, AREA == "Military" )
CE.Transport <- subset (wavedata.vol50, AREA == "Transport" )
ggplot (subset (CE.Epidemic, volume > 50 ), aes (y = volume, x = year, size = volume)) +
geom_point (fill = "white" , shape = 1 ) +
scale_y_log10 ()
table (CE.Epidemic$ wordlist)[table (CE.Epidemic$ wordlist) > 0 ]
save (wavedata, file = "wavedata.RData" )
save (wavedata.vol50, file = "wavedata_vol50.RData" )
save (wave.keywords, file = "wave_keywords.RData" )
save (tss, file = "tss_total.RData" )
save (wd5, file = "wd5.RData" )
textdirectory1 <- NA
textdirectory2 <- NA
for (w in 1 : W)
{
text.to.output <- wave.keywords[[w]]$ texts[1 : 2 , ]
text1 <- paste (
(text.to.output[1 , 1 ]), " \n " ,
(text.to.output[1 , 2 ]), " \n " ,
paste (wave.keywords[[w]]$ wordlist, collapse = " " ), " \n " ,
wave.keywords[[w]]$ topic, " \n " ,
wave.keywords[[w]]$ area, " \n " ,
(textfiles.1 $ newspaper[which (textfiles.1 $ id == text.to.output[1 , 1 ])]), " \n " ,
(textfiles.1 $ date[which (textfiles.1 $ id == text.to.output[1 , 1 ])]), " \n " ,
(textfiles.1 $ headline[which (textfiles.1 $ id == text.to.output[1 , 1 ])]), " \n " ,
(textfiles.1 $ text[which (textfiles.1 $ id == text.to.output[1 , 1 ])])
)
filename1 <- paste0 ("et" , w, "---1" , ".txt" )
text2 <- paste (
(text.to.output[2 , 1 ]), " \n " ,
(text.to.output[2 , 2 ]), " \n " ,
paste (wave.keywords[[w]]$ wordlist, collapse = " " ), " \n " ,
wave.keywords[[w]]$ topic, " \n " ,
wave.keywords[[w]]$ area, " \n " ,
(textfiles.1 $ newspaper[which (textfiles.1 $ id == text.to.output[2 , 1 ])]), " \n " ,
(textfiles.1 $ date[which (textfiles.1 $ id == text.to.output[2 , 1 ])]), " \n " ,
(textfiles.1 $ headline[which (textfiles.1 $ id == text.to.output[2 , 1 ])]), " \n " ,
(textfiles.1 $ text[which (textfiles.1 $ id == text.to.output[2 , 1 ])])
)
filename2 <- paste0 ("et" , ifelse (w > 999 , "0" , ifelse (w > 99 , "00" , ifelse (w > 9 , "000" , "0000" ))), w, "-2" , ".txt" )
save.csv (text1, file = filename1)
textdirectory1[w] <- c (text1)
textdirectory2[w] <- c (text2)
print (paste0 (w, "/" , W))
flush.console ()
}
fileConn <- file ("EventText1.txt" )
writeLines (textdirectory1, fileConn)
close (fileConn)
fileConn <- file ("EventText2.txt" )
writeLines (textdirectory2, fileConn)
close (fileConn)
topic_diversity_data <- data.frame (year = NA , indicator = NA , categories = NA , corpus = NA , value = NA )
thetimes_L <- pivot_longer (thetimes3, cols = c ("active250" , "active50" , "active20" ), names_to = "categories" , values_to = "x" )
topic_diversity_data <- rbind (
topic_diversity_data,
data.frame (year = thetimes_L$ year, indicator = "active" , categories = thetimes_L$ categories, corpus = "crisis labelling" , value = thetimes_L$ x),
data.frame (year = thetimes.d3$ decade, indicator = "active" , categories = thetimes.d3$ name, corpus = "crisis news waves" , value = thetimes.d3$ value),
data.frame (year = df.active_L$ year, indicator = "active" , categories = df.active_L$ resolution, corpus = "routine" , value = df.active_L$ active),
data.frame (year = thetimes3$ year, indicator = "gini" , categories = thetimes3$ var, corpus = "crisis labelling" , value = 1 - thetimes3$ value),
data.frame (year = rep (thetimes$ year, times = 3 ), indicator = "gini" , categories = rep (c ("21 topic areas" , "163 topic complexes" , "250 topics" ), each = 236 ), corpus = "crisis news waves" , value = c (thetimes$ nw.rgini20r, thetimes$ nw.rgini50r, thetimes$ nw.rgini250r)),
data.frame (year = str_extract (df.gini_L$ year, pattern = "[:digit:]{4,4}" ), indicator = "gini" , categories = df.gini_L$ name, corpus = "routine" , value = df.gini_L$ value)
)
topic_diversity_data$ corp <- factor (topic_diversity_data$ corpus, ordered = TRUE , levels = c ("routine" , "crisis labelling" , "crisis news waves" ))
topic_diversity_data$ cat <- factor (Recode (topic_diversity_data$ categories, "'active20'='21 topic areas';'active50'='163 topic complexes';'active250'='250 topics';'nw20d'='21 topic areas';'nw50d'='163 topic complexes';'nw250d'='250 topics';'topic areas(20)'='21 topic areas';'topic complexes(50)'='163 topic complexes';'topic(250)'='250 topics';'topic20'='21 topic areas';'topic50'='163 topic complexes';'topic250'='250 topics'" ), ordered = TRUE , levels = c ("250 topics" , "163 topic complexes" , "21 topic areas" ))
top_div_dat <- subset (topic_diversity_data, ! is.na (value))
ggplot (subset (top_div_dat, cat == "21 topic areas" ), aes (y = value, x = as.Date (year, format = "%Y" ), linetype = corpus, shape = corpus, color = indicator)) +
geom_point () +
geom_smooth ()
ggplot (subset (top_div_dat, cat == "163 topic complexes" ), aes (y = value, x = as.Date (year, format = "%Y" ), linetype = corpus, shape = corpus, color = indicator)) +
geom_point () +
geom_smooth ()
gg_topic_diversity <- ggplot (top_div_dat, aes (y = value, x = as.Date (year, format = "%Y" ), linetype = corp, shape = corp, color = corp)) +
geom_point () +
geom_smooth (se = FALSE ) +
theme_bluewhite () +
scale_fill_viridis_d (option = "inferno" , begin = 0 , end = 0.85 ) +
scale_color_viridis_d (option = "inferno" , begin = 0 , end = 0.85 ) +
facet_grid (cat ~ indicator) +
xlab ("Year" ) +
ylab ("Share of active topics Diversity (1-Gini)" ) +
theme (legend.position = "bottom" )
ggsave (gg_topic_diversity, file = "gg_topic_diversity.svg" , scale = 1.25 , dpi = 1200 , unit = "cm" , width = 16 , height = 16 )
thetimes$ statistics <- social_statistics_timeline[match (thetimes$ year, social_statistics_timeline$ year), "indicators" ]
thetimes$ newspaper_circulation <- media_indicators[match (thetimes$ year, media_indicators$ year), "newspaper_circulation_i" ]
thetimes$ TV_households_share <- media_indicators[match (thetimes$ year, media_indicators$ year), "TV_households_share_i" ]
thetimes$ Internet_penetration <- media_indicators[match (thetimes$ year, media_indicators$ year), "Internet_penetration_i" ]
thetimes$ media_autonomy <- df_bes_year[match (thetimes$ year, df_bes_year$ year), "no_party_id" ]
thetimes$ media_autonomy_i <- na_locf (thetimes$ media_autonomy)
thetimes$ Newspaper_212 <- arima (thetimes$ newspaper_circulation, order = c (2 , 1 , 2 ))$ resid
thetimes$ TV_212 <- arima (thetimes$ TV_households_share, order = c (2 , 1 , 2 ))$ resid
thetimes$ Internet_212 <- arima (thetimes$ Internet_penetration, order = c (2 , 1 , 2 ))$ resid
thetimes$ ORG_212 <- arima (thetimes$ ccORG_per_article, order = c (2 , 1 , 2 ))$ resid
thetimes$ PERSON_212 <- arima (thetimes$ ccPERSON_per_article, order = c (2 , 1 , 2 ))$ resid
thetimes$ SpendingShare_212 <- arima (thetimes$ spending_GDP, order = c (2 , 1 , 2 ))$ resid
thetimes$ SpendingDiversity_212 <- arima (thetimes$ gini_rev, order = c (2 , 1 , 2 ))$ resid
thetimes$ Autonomy_212 <- arima (thetimes$ media_autonomy_i, order = c (2 , 1 , 2 ))$ resid
thetimes$ Statistics_212 <- arima (thetimes$ statistics, order = c (2 , 1 , 2 ))$ resid
thetimes$ CLSHARE_212 <- arima (thetimes$ cl.share, order = c (2 , 1 , 2 ))$ resid
thetimes$ Penetration <- rowMaxs (as.matrix (thetimes[, c ("newspaper_circulation" , "TV_households_share" , "Internet_penetration" )]))
thetimes$ Penetration_212 <- arima (thetimes$ Penetration, order = c (2 , 1 , 2 ))$ resid
thetimes$ Autonomy_bi <- 1 * (thetimes$ media_autonomy_i > 0.06 )
CLS_m6 <- (lm (CLSHARE_212 ~ Penetration_212 * Autonomy_212 + ORG_212 + PERSON_212 + SpendingShare_212 + SpendingDiversity_212 + Statistics_212 + Internet_212, data = thetimes))
CLS_m5 <- (lm (CLSHARE_212 ~ Penetration_212 * Autonomy_212 + ORG_212 + PERSON_212 + SpendingShare_212 + SpendingDiversity_212 + Statistics_212, data = thetimes))
CLS_m4 <- (lm (CLSHARE_212 ~ Penetration_212 * Autonomy_212 + SpendingShare_212 + SpendingDiversity_212 + Statistics_212, data = thetimes))
CLS_m3 <- (lm (CLSHARE_212 ~ Penetration_212 * Autonomy_212 + SpendingShare_212 + SpendingDiversity_212, data = thetimes))
CLS_m2b <- (lm (CLSHARE_212 ~ ORG_212 + PERSON_212, data = thetimes))
CLS_m2c <- (lm (CLSHARE_212 ~ Statistics_212, data = thetimes))
CLS_m2 <- (lm (CLSHARE_212 ~ (Penetration_212) * Autonomy_212, data = thetimes))
CLS_m1 <- (lm (CLSHARE_212 ~ SpendingShare_212 + SpendingDiversity_212, data = thetimes))
CLS_m0 <- (lm (CLSHARE_212 ~ 1 , data = thetimes))
CLS_m5b <- (lm (CLSHARE_212 ~ + ORG_212 + PERSON_212 + SpendingShare_212 + SpendingDiversity_212 + Statistics_212, data = thetimes))
thetimes$ gini_rev_i <- na_locf (thetimes$ gini_rev)
arima_CL1 <- Arima (thetimes$ cl.share, xreg = as.matrix (thetimes[, c ("Penetration" , "media_autonomy_i" , "statistics" , "ccORG_per_article" , "ccPERSON_per_article" , "spending_GDP" , "gini_rev" )]), order = c (2 , 1 , 2 ), include.drift = TRUE )
arima_CL2 <- Arima (thetimes$ cl.share, xreg = as.matrix (thetimes[, c ("Penetration_212" , "Autonomy_212" , "Statistics_212" , "ORG_212" , "PERSON_212" , "SpendingShare_212" , "SpendingDiversity_212" )]), order = c (2 , 1 , 2 ), include.drift = TRUE )
lm_CL1 <- lm (cl.share ~ Penetration * media_autonomy_i + statistics + ccORG_per_article + ccPERSON_per_article + spending_GDP + gini_rev_i + year, data = thetimes)
lm_CL2 <- lm (cl.share ~ Penetration * media_autonomy_i + statistics + ccORG_per_article + ccPERSON_per_article + spending_GDP + gini_rev_i, data = subset (thetimes, year < 2000 ))
lm_CL3 <- lm (cl.share ~ Penetration * media_autonomy_i + statistics + ccORG_per_article + ccPERSON_per_article + spending_GDP + gini_rev_i, data = subset (thetimes, year > 1900 ))
lm_CL4 <- lm (cl.share ~ Penetration * media_autonomy_i + statistics + ccORG_per_article + ccPERSON_per_article + spending_GDP + gini_rev_i, data = subset (thetimes, year < 1900 ))
lm_CL5 <- lm (cl.share ~ Penetration * media_autonomy_i + statistics + ccORG_per_article + ccPERSON_per_article + spending_GDP + gini_rev_i, data = subset (thetimes, year < 1950 ))
lm_CL6 <- lm (cl.share ~ Penetration * media_autonomy_i + statistics + ccORG_per_article + ccPERSON_per_article + spending_GDP + gini_rev_i, data = subset (thetimes, year < 1975 ))
lm_CL7 <- lm (cl.share ~ Penetration * media_autonomy_i + statistics + ccORG_per_article + ccPERSON_per_article + spending_GDP + gini_rev_i, data = subset (thetimes, year < 1990 ))
lm_CL8 <- lm (cl.share ~ Penetration * media_autonomy_i + statistics + ccORG_per_article + ccPERSON_per_article + spending_GDP + gini_rev_i, data = thetimes)
# Prediction trained on the pre-2000 data (forecast for 2000-2020)
thetimes$ pred_1 <- stats:: predict (lm_CL2, newdata = thetimes[, c ("Penetration" , "media_autonomy_i" , "statistics" , "ccORG_per_article" , "ccPERSON_per_article" , "spending_GDP" , "gini_rev_i" )])
# Prediction trained on the post-1900 data (forecast for 1785-1900)
thetimes$ pred_2 <- stats:: predict (lm_CL3, newdata = thetimes[, c ("Penetration" , "media_autonomy_i" , "statistics" , "ccORG_per_article" , "ccPERSON_per_article" , "spending_GDP" , "gini_rev_i" )])
# Prediction trained on the pre-1900 data (forecast for 1900-2020)
thetimes$ pred_3 <- stats:: predict (lm_CL4, newdata = thetimes[, c ("Penetration" , "media_autonomy_i" , "statistics" , "ccORG_per_article" , "ccPERSON_per_article" , "spending_GDP" , "gini_rev_i" )])
# Prediction trained on the pre-1900 data (forecast for 1900-2020)
thetimes$ pred_4 <- stats:: predict (lm_CL5, newdata = thetimes[, c ("Penetration" , "media_autonomy_i" , "statistics" , "ccORG_per_article" , "ccPERSON_per_article" , "spending_GDP" , "gini_rev_i" )])
# Prediction trained on the pre-1900 data (forecast for 1900-2020)
thetimes$ pred_5 <- stats:: predict (lm_CL6, newdata = thetimes[, c ("Penetration" , "media_autonomy_i" , "statistics" , "ccORG_per_article" , "ccPERSON_per_article" , "spending_GDP" , "gini_rev_i" )])
# Prediction trained on the pre-1900 data (forecast for 1900-2020)
thetimes$ pred_6 <- stats:: predict (lm_CL7, newdata = thetimes[, c ("Penetration" , "media_autonomy_i" , "statistics" , "ccORG_per_article" , "ccPERSON_per_article" , "spending_GDP" , "gini_rev_i" )])
# Prediction trained on the pre-1900 data (forecast for 1900-2020)
thetimes$ pred_7 <- stats:: predict (lm_CL8, newdata = thetimes[, c ("Penetration" , "media_autonomy_i" , "statistics" , "ccORG_per_article" , "ccPERSON_per_article" , "spending_GDP" , "gini_rev_i" )])
ggplot (thetimes, aes (x = year)) +
geom_point (shape = 15 , aes (y = cl.share)) +
geom_point (color = "red" , shape = 18 , aes (y = pred_1))
ggplot (thetimes, aes (x = year)) +
geom_point (shape = 15 , aes (y = cl.share)) +
geom_point (color = "red" , shape = 18 , aes (y = pred_2))
ggplot (thetimes, aes (x = year)) +
geom_point (shape = 15 , aes (y = cl.share)) +
geom_point (color = "red" , shape = 18 , aes (y = pred_3))
lm_CL1X <- lm (cl.share ~ Penetration * media_autonomy_i + statistics + spending_GDP + gini_rev_i + year, data = thetimes)
lm_CL2X <- lm (cl.share ~ Penetration * media_autonomy_i + statistics + spending_GDP + gini_rev_i, data = subset (thetimes, year < 2000 ))
lm_CL3X <- lm (cl.share ~ Penetration * media_autonomy_i + statistics + spending_GDP + gini_rev_i, data = subset (thetimes, year > 1900 ))
lm_CL4X <- lm (cl.share ~ Penetration * media_autonomy_i + statistics + spending_GDP + gini_rev_i, data = subset (thetimes, year < 1900 ))
lm_CL5X <- lm (cl.share ~ Penetration * media_autonomy_i + statistics + spending_GDP + gini_rev_i, data = subset (thetimes, year < 1950 ))
lm_CL6X <- lm (cl.share ~ Penetration * media_autonomy_i + statistics + spending_GDP + gini_rev_i, data = subset (thetimes, year < 1975 ))
lm_CL7X <- lm (cl.share ~ Penetration * media_autonomy_i + statistics + spending_GDP + gini_rev_i, data = subset (thetimes, year < 1990 ))
lm_CL8X <- lm (cl.share ~ Penetration * media_autonomy_i + statistics + spending_GDP + gini_rev_i, data = thetimes)
# Prediction trained on the pre-2000 data (forecast for 2000-2020)
thetimes$ pred_1X <- stats:: predict (lm_CL2X, newdata = thetimes[, c ("Penetration" , "media_autonomy_i" , "statistics" , "spending_GDP" , "gini_rev_i" )])
# Prediction trained on the post-1900 data (forecast for 1785-1900)
thetimes$ pred_2X <- stats:: predict (lm_CL3X, newdata = thetimes[, c ("Penetration" , "media_autonomy_i" , "statistics" , "spending_GDP" , "gini_rev_i" )])
# Prediction trained on the pre-1900 data (forecast for 1900-2020)
thetimes$ pred_3X <- stats:: predict (lm_CL4X, newdata = thetimes[, c ("Penetration" , "media_autonomy_i" , "statistics" , "spending_GDP" , "gini_rev_i" )])
# Prediction trained on the pre-1950 data (forecast for 1950-2020)
thetimes$ pred_4X <- stats:: predict (lm_CL5X, newdata = thetimes[, c ("Penetration" , "media_autonomy_i" , "statistics" , "spending_GDP" , "gini_rev_i" )])
# Prediction trained on the pre-1975 data (forecast for 1975-2020)
thetimes$ pred_5X <- stats:: predict (lm_CL6X, newdata = thetimes[, c ("Penetration" , "media_autonomy_i" , "statistics" , "spending_GDP" , "gini_rev_i" )])
# Prediction trained on the pre-1990 data (forecast for 1990-2020)
thetimes$ pred_6X <- stats:: predict (lm_CL7X, newdata = thetimes[, c ("Penetration" , "media_autonomy_i" , "statistics" , "spending_GDP" , "gini_rev_i" )])
# Prediction trained on the full data (no forecast)
thetimes$ pred_7X <- stats:: predict (lm_CL8X, newdata = thetimes[, c ("Penetration" , "media_autonomy_i" , "statistics" , "spending_GDP" , "gini_rev_i" )])
ggplot (thetimes, aes (x = year)) +
geom_point (shape = 15 , aes (y = cl.share)) +
geom_point (color = "red" , shape = 18 , aes (y = pred_1X))
ggplot (thetimes, aes (x = year)) +
geom_point (shape = 15 , aes (y = cl.share)) +
geom_point (color = "red" , shape = 18 , aes (y = pred_2X))
ggplot (thetimes, aes (x = year)) +
geom_point (shape = 15 , aes (y = cl.share)) +
geom_point (color = "red" , shape = 18 , aes (y = pred_3X))
ggplot (thetimes, aes (x = year)) +
geom_point (shape = 15 , aes (y = cl.share)) +
geom_point (color = "red" , shape = 18 , aes (y = pred_4X))
ggplot (thetimes, aes (x = year)) +
geom_point (shape = 15 , aes (y = cl.share)) +
geom_point (color = "red" , shape = 18 , aes (y = pred_5X))
gg_1785_1990 <- ggplot (thetimes, aes (x = year)) +
geom_point (shape = 15 , aes (y = cl.share)) +
geom_point (color = "red" , shape = 18 , aes (y = pred_6X)) +
annotate ("rect" , xmin = 1990 , xmax = 2020 , ymin = 0 , ymax = 0.01 , fill = "darkblue" , alpha = .25 ) +
annotate ("rect" , xmin = 1785 , xmax = 1990 , ymin = 0 , ymax = 0.01 , fill = "darkred" , alpha = .25 ) +
theme_soft () +
ylab ("Crisis Labelling Salience" )
gg_1785_2000 <- gg_1785_1975 <- ggplot (thetimes, aes (x = year)) +
geom_point (shape = 15 , aes (y = cl.share)) +
geom_point (color = "red" , shape = 18 , aes (y = pred_1X)) +
annotate ("rect" , xmin = 2000 , xmax = 2020 , ymin = 0 , ymax = 0.01 , fill = "darkblue" , alpha = .25 ) +
annotate ("rect" , xmin = 1785 , xmax = 2000 , ymin = 0 , ymax = 0.01 , fill = "darkred" , alpha = .25 ) +
theme_soft () +
ylab ("Crisis Labelling Salience" )
gg_1900_2020 <- ggplot (thetimes, aes (x = year)) +
geom_point (shape = 15 , aes (y = cl.share)) +
geom_point (color = "red" , shape = 18 , aes (y = pred_2X)) +
annotate ("rect" , xmin = 1785 , xmax = 1900 , ymin = 0 , ymax = 0.01 , fill = "darkblue" , alpha = .25 ) +
annotate ("rect" , xmin = 1900 , xmax = 2020 , ymin = 0 , ymax = 0.01 , fill = "darkred" , alpha = .25 ) +
theme_soft () +
ylab ("Crisis Labelling Salience" )
gg_1785_1900 <- ggplot (thetimes, aes (x = year)) +
geom_point (shape = 15 , aes (y = cl.share)) +
geom_point (color = "red" , shape = 18 , aes (y = pred_3X)) +
annotate ("rect" , xmin = 1900 , xmax = 2020 , ymin = 0 , ymax = 0.01 , fill = "darkblue" , alpha = .25 ) +
annotate ("rect" , xmin = 1785 , xmax = 1900 , ymin = 0 , ymax = 0.01 , fill = "darkred" , alpha = .25 ) +
theme_soft () +
ylab ("Crisis Labelling Salience" )
gg_1785_1950 <- ggplot (thetimes, aes (x = year)) +
geom_point (shape = 15 , aes (y = cl.share)) +
geom_point (color = "red" , shape = 18 , aes (y = pred_4X)) +
annotate ("rect" , xmin = 1950 , xmax = 2020 , ymin = 0 , ymax = 0.01 , fill = "darkblue" , alpha = .25 ) +
annotate ("rect" , xmin = 1785 , xmax = 1950 , ymin = 0 , ymax = 0.01 , fill = "darkred" , alpha = .25 ) +
theme_soft () +
ylab ("Crisis Labelling Salience" )
gg_1785_1975 <- ggplot (thetimes, aes (x = year)) +
geom_point (shape = 15 , aes (y = cl.share)) +
geom_point (color = "red" , shape = 18 , aes (y = pred_5X)) +
annotate ("rect" , xmin = 1975 , xmax = 2020 , ymin = 0 , ymax = 0.20 , fill = "darkblue" , alpha = .25 ) +
annotate ("rect" , xmin = 1785 , xmax = 1975 , ymin = 0 , ymax = 0.20 , fill = "darkred" , alpha = .25 ) +
theme_soft () +
ylab ("Crisis Labelling Salience" )
gg_1785_2020 <- ggplot (thetimes, aes (x = year)) +
geom_point (shape = 15 , aes (y = cl.share)) +
geom_point (color = "red" , shape = 18 , aes (y = pred_7X)) +
annotate ("rect" , xmin = 1785 , xmax = 2020 , ymin = 0 , ymax = 0.01 , fill = "darkred" , alpha = .25 ) +
theme_soft () +
ylab ("Crisis Labelling Salience" )
ggX_1785_1990 <- ggplot (thetimes, aes (x = year)) +
geom_point (shape = 15 , aes (y = cl.share)) +
geom_point (color = "red" , shape = 18 , aes (y = pred_6X)) +
annotate ("rect" , xmin = 1990 , xmax = 2020 , ymin = 0 , ymax = 0.01 , fill = "darkblue" , alpha = .25 ) +
annotate ("rect" , xmin = 1785 , xmax = 1990 , ymin = 0 , ymax = 0.01 , fill = "darkred" , alpha = .25 ) +
theme_soft () +
ylab ("Crisis Labelling Salience" )
ggX_1785_2000 <- gg_1785_1975 <- ggplot (thetimes, aes (x = year)) +
geom_point (shape = 15 , aes (y = cl.share)) +
geom_point (color = "red" , shape = 18 , aes (y = pred_1X)) +
annotate ("rect" , xmin = 2000 , xmax = 2020 , ymin = 0 , ymax = 0.01 , fill = "darkblue" , alpha = .25 ) +
annotate ("rect" , xmin = 1785 , xmax = 2000 , ymin = 0 , ymax = 0.01 , fill = "darkred" , alpha = .25 ) +
theme_soft () +
ylab ("Crisis Labelling Salience" )
ggX_1900_2020 <- ggplot (thetimes, aes (x = year)) +
geom_point (shape = 15 , aes (y = cl.share)) +
geom_point (color = "red" , shape = 18 , aes (y = pred_2X)) +
annotate ("rect" , xmin = 1785 , xmax = 1900 , ymin = 0 , ymax = 0.01 , fill = "darkblue" , alpha = .25 ) +
annotate ("rect" , xmin = 1900 , xmax = 2020 , ymin = 0 , ymax = 0.01 , fill = "darkred" , alpha = .25 ) +
theme_soft () +
ylab ("Crisis Labelling Salience" )
ggX_1785_1900 <- ggplot (thetimes, aes (x = year)) +
geom_point (shape = 15 , aes (y = cl.share)) +
geom_point (color = "red" , shape = 18 , aes (y = pred_3X)) +
annotate ("rect" , xmin = 1900 , xmax = 2020 , ymin = 0 , ymax = 0.01 , fill = "darkblue" , alpha = .25 ) +
annotate ("rect" , xmin = 1785 , xmax = 1900 , ymin = 0 , ymax = 0.01 , fill = "darkred" , alpha = .25 ) +
theme_soft () +
ylab ("Crisis Labelling Salience" )
ggX_1785_1950 <- ggplot (thetimes, aes (x = year)) +
geom_point (shape = 15 , aes (y = cl.share)) +
geom_point (color = "red" , shape = 18 , aes (y = pred_4X)) +
annotate ("rect" , xmin = 1950 , xmax = 2020 , ymin = 0 , ymax = 0.01 , fill = "darkblue" , alpha = .25 ) +
annotate ("rect" , xmin = 1785 , xmax = 1950 , ymin = 0 , ymax = 0.01 , fill = "darkred" , alpha = .25 ) +
theme_soft () +
ylab ("Crisis Labelling Salience" )
ggX_1785_1975 <- ggplot (thetimes, aes (x = year)) +
geom_point (shape = 15 , aes (y = cl.share)) +
geom_point (color = "red" , shape = 18 , aes (y = pred_5X)) +
annotate ("rect" , xmin = 1975 , xmax = 2020 , ymin = 0 , ymax = 0.20 , fill = "darkblue" , alpha = .25 ) +
annotate ("rect" , xmin = 1785 , xmax = 1975 , ymin = 0 , ymax = 0.20 , fill = "darkred" , alpha = .25 ) +
theme_soft () +
ylab ("Crisis Labelling Salience" )
ggX_1785_2020 <- ggplot (thetimes, aes (x = year)) +
geom_point (shape = 15 , aes (y = cl.share)) +
geom_point (color = "red" , shape = 18 , aes (y = pred_7X)) +
annotate ("rect" , xmin = 1785 , xmax = 2020 , ymin = 0 , ymax = 0.01 , fill = "darkred" , alpha = .25 ) +
theme_soft () +
ylab ("Crisis Labelling Salience" )
prediction_long <- pivot_longer (thetimes, col = c ("pred_1" , "pred_2" , "pred_3" , "pred_4" , "pred_5" , "pred_6" , "pred_7" , "pred_1X" , "pred_2X" , "pred_3X" , "pred_4X" , "pred_5X" , "pred_6X" , "pred_7X" ))
prediction_long$ train_start <- car:: Recode (prediction_long$ name, "'pred_1'=1785;'pred_1X'=1785;'pred_2'=1900;'pred_2X'=1900;'pred_3'=1785;'pred_3X'=1785;'pred_4'=1785;'pred_4X'=1785;'pred_5'=1785;'pred_5X'=1785;'pred_6'=1785;'pred_6X'=1785;'pred_7'=1785;'pred_7X'=1785" )
prediction_long$ train_end <- car:: Recode (prediction_long$ name, "'pred_1'=2000;'pred_1X'=2000;'pred_2'=2020;'pred_2X'=2020;'pred_3'=1900;'pred_3X'=1900;'pred_4'=1950;'pred_4X'=1950;'pred_5'=1975;'pred_5X'=1975;'pred_6'=1990;'pred_6X'=1990;'pred_7'=2020;'pred_7X'=2020" )
prediction_long$ train_end <- ifelse (prediction_long$ year == 2020 , prediction_long$ train_end, NA )
prediction_long$ predict_start <- car:: Recode (prediction_long$ name, "'pred_1'=2000;'pred_1X'=2000;'pred_2'=1785;'pred_2X'=1785;'pred_3'=1900;'pred_3X'=1900;'pred_4'=1950;'pred_4X'=1950;'pred_5'=1975;'pred_5X'=1975;'pred_6'=1990;'pred_6X'=1990;'pred_7'=2020;'pred_7X'=2020" )
prediction_long$ predict_end <- car:: Recode (prediction_long$ name, "'pred_1'=2020;'pred_1X'=2020;'pred_2'=1900;'pred_2X'=1900;'pred_3'=2020;'pred_3X'=2020;'pred_4'=2020;'pred_4X'=2020;'pred_5'=2020;'pred_5X'=2020;'pred_6'=2020;'pred_6X'=2020;'pred_7'=2020;'pred_7X'=2020" )
prediction_long$ predict_end <- ifelse (prediction_long$ year == 2020 , prediction_long$ predict_end, NA )
prediction_long$ train <- car:: Recode (prediction_long$ name, "'pred_1'='1785-2000';'pred_1X'='1785-2000';'pred_2'='1900-2020';'pred_2X'='1900-2020';'pred_3'='1785-1900';'pred_3X'='1785-1900';'pred_4'='1785-1950';'pred_4X'='1785-1950';'pred_5'='1785-1975';'pred_5X'='1785-1975';'pred_6'='1785-1990';'pred_6X'='1785-1990';'pred_7'='1785-2020';'pred_7X'='1785-2020'" )
prediction_long$ set <- car:: Recode (prediction_long$ name, "'pred_1'='with_ORG';'pred_1X'='without_ORG';'pred_2'='with_ORG';'pred_2X'='without_ORG';'pred_3'='with_ORG';'pred_3X'='without_ORG';'pred_4'='with_ORG';'pred_4X'='without_ORG';'pred_5'='with_ORG';'pred_5X'='without_ORG';'pred_6'='with_ORG';'pred_6X'='without_ORG';'pred_7'='with_ORG';'pred_7X'='without_ORG'" )
prediction_long$ range <- car:: Recode (prediction_long$ name, "'pred_1'=0.01;'pred_1X'=0.01;'pred_2'=0.01;'pred_2X'=0.01;'pred_3'=0.01;'pred_3X'=0.01;'pred_4'=0.01;'pred_4X'=0.01;'pred_5'=0.2;'pred_5X'=0.2;'pred_6'=0.01;'pred_6X'=0.01;'pred_7'=0.01;'pred_7X'=0.01" )
ggplot (thetimes, aes (x = year)) +
geom_point (shape = 15 , aes (y = cl.share)) +
geom_point (color = "red" , shape = 18 , aes (y = pred_5X)) +
annotate ("rect" , xmin = 1975 , xmax = 2020 , ymin = 0 , ymax = 0.20 , fill = "darkblue" , alpha = .25 ) +
annotate ("rect" , xmin = 1785 , xmax = 1975 , ymin = 0 , ymax = 0.20 , fill = "darkred" , alpha = .25 ) +
theme_soft () +
ylab ("Crisis Labelling Salience" )
gg_calibration <- ggplot (prediction_long, aes (x = year)) +
geom_point (shape = 15 , aes (y = cl.share)) +
geom_point (color = "red" , shape = 18 , aes (y = value)) +
geom_rect (aes (xmin = train_start, xmax = train_end, ymax = range), ymin = 0 , fill = "darkblue" , alpha = .2 ) +
geom_rect (aes (xmin = predict_start, xmax = predict_end, ymax = range), ymin = 0 , fill = "darkred" , alpha = .2 ) +
facet_grid (train ~ set, scale = "free_y" ) +
theme_soft () +
ylab ("Crisis Labelling Salience" )
ggplot (subset (prediction_long, name == "pred_1" ), aes (x = year)) +
geom_rect (aes (xmin = train_start, xmax = train_end), ymin = 0 , ymax = 0.025 , fill = "darkblue" , alpha = .25 / 236 ) +
geom_rect (aes (xmin = predict_start, xmax = predict_end), ymin = 0 , ymax = 0.025 , fill = "darkred" , alpha = .25 / 236 ) +
geom_point (shape = 15 , aes (y = cl.share)) +
geom_point (color = "red" , shape = 18 , aes (y = value)) +
facet_grid (train ~ set, scale = "free_y" ) +
theme_soft () +
ylab ("Crisis Labelling Salience" )
gg_calibration <- ggplot (prediction_long, aes (x = year)) +
geom_point (shape = 15 , aes (y = cl.share)) +
geom_point (color = "red" , shape = 18 , aes (y = value)) +
geom_rect (aes (xmin = train_start, xmax = train_end, ymax = range), ymin = 0 , fill = "darkblue" , alpha = .25 ) +
geom_rect (aes (xmin = predict_start, xmax = predict_end, ymax = range), ymin = 0 , fill = "darkred" , alpha = .25 ) +
facet_grid (train ~ set, scale = "free_y" ) +
theme_soft () +
ylab ("Crisis Labelling Salience" )
ggsave (file = "gg_calibration.svg" , device = "svg" , gg_calibration, unit = "cm" , width = 12 , height = 16 , scale = 1.25 , dpi = 1200 )
stargazer (lm_CL1, lm_CL2, lm_CL7, lm_CL6, lm_CL5, lm_CL4, lm_CL3, type = "text" , column.labels = c ("1785-2020" , "1785-2000" , "1785-1990" , "1785-1975" , "1785-1950" , "1785-1900" , "1900-2020" ))
stargazer (lm_CL1X, lm_CL2X, lm_CL7X, lm_CL6X, lm_CL5X, lm_CL4X, lm_CL3X, type = "text" , column.labels = c ("1785-2020" , "1785-2000" , "1785-1990" , "1785-1975" , "1785-1950" , "1785-1900" , "1900-2020" ))
with (subset (thetimes, year >= 2000 ), cor.test (pred_1, cl.share))$ estimate^ 2
with (subset (thetimes, year >= 2000 ), cor.test (pred_1X, cl.share))$ estimate^ 2
with (subset (thetimes, year <= 1900 ), cor.test (pred_2, cl.share))$ estimate^ 2
with (subset (thetimes, year <= 1900 ), cor.test (pred_2X, cl.share))$ estimate^ 2
with (subset (thetimes, year >= 1900 ), cor.test (pred_3, cl.share))$ estimate^ 2
with (subset (thetimes, year >= 1900 ), cor.test (pred_3X, cl.share))$ estimate^ 2
with (subset (thetimes, year >= 1950 ), cor.test (pred_4, cl.share))$ estimate^ 2
with (subset (thetimes, year >= 1950 ), cor.test (pred_4X, cl.share))$ estimate^ 2
with (subset (thetimes, year >= 1975 ), cor.test (pred_5, cl.share))$ estimate^ 2
with (subset (thetimes, year >= 1975 ), cor.test (pred_5X, cl.share))$ estimate^ 2
with (subset (thetimes, year >= 1990 ), cor.test (pred_6, cl.share))$ estimate^ 2
with (subset (thetimes, year >= 1990 ), cor.test (pred_6X, cl.share))$ estimate^ 2
with (thetimes, cor.test (pred_7, cl.share))
with (thetimes, cor.test (pred_7X, cl.share))
grid.arrange (gg_1900_2020, gg_1785_1900, gg_1785_1950, gg_1785_1975, gg_1785_1990, gg_1785_2000, gg_1785_2020, ggX_1900_2020, ggX_1785_1900, ggX_1785_1950, ggX_1785_1975, ggX_1785_1990, ggX_1785_2000, ggX_1785_2020, ncol = 2 , as.table = FALSE )
library (stargazer)
stargazer (CLS_m0, CLS_m1, CLS_m2b, CLS_m2, CLS_m3, CLS_m4, CLS_m5, CLS_m6, type = "text" )
summary (lm (cl.share ~ newspaper_circulation + TV_households_share + Internet_penetration + statistics + (ccPERSON_index) + (ccORG_index) + spending_GDP + gini_rev + year, data = thetimes))
arima_CL.1900 _2020 <- Arima (subset (thetimes, year > 1900 )[, "cl.share" ], xreg = as.matrix (subset (thetimes, year > 1900 )[, c ("Penetration" , "media_autonomy_i" , "statistics" , "ccORG_per_article" , "ccPERSON_per_article" , "spending_GDP" , "gini_rev_i" )]), order = c (2 , 1 , 2 ), include.drift = TRUE )
arima_CL.1785 _1900 <- Arima (subset (thetimes, year < 1900 )[, "cl.share" ], xreg = as.matrix (subset (thetimes, year < 1900 )[, c ("Penetration" , "statistics" , "ccORG_per_article" , "ccPERSON_per_article" , "spending_GDP" , "gini_rev_i" )]), order = c (2 , 1 , 2 ), include.drift = TRUE )
arima_CL.1785 _1950 <- Arima (subset (thetimes, year < 1950 )[, "cl.share" ], xreg = as.matrix (subset (thetimes, year < 1950 )[, c ("Penetration" , "statistics" , "ccORG_per_article" , "ccPERSON_per_article" , "spending_GDP" , "gini_rev_i" )]), order = c (2 , 1 , 2 ), include.drift = TRUE )
arima_CL.1785 _1975 <- Arima (subset (thetimes, year < 1975 )[, "cl.share" ], xreg = as.matrix (subset (thetimes, year < 1975 )[, c ("Penetration" , "media_autonomy_i" , "statistics" , "ccORG_per_article" , "ccPERSON_per_article" , "spending_GDP" , "gini_rev_i" )]), order = c (2 , 1 , 2 ), include.drift = TRUE )
arima_CL.1785 _1990 <- Arima (subset (thetimes, year < 1990 )[, "cl.share" ], xreg = as.matrix (subset (thetimes, year < 1990 )[, c ("Penetration" , "media_autonomy_i" , "statistics" , "ccORG_per_article" , "ccPERSON_per_article" , "spending_GDP" , "gini_rev_i" )]), order = c (2 , 1 , 2 ), include.drift = TRUE )
arima_CL.1785 _2000 <- Arima (subset (thetimes, year < 2000 )[, "cl.share" ], xreg = as.matrix (subset (thetimes, year < 2000 )[, c ("Penetration" , "media_autonomy_i" , "statistics" , "ccORG_per_article" , "ccPERSON_per_article" , "spending_GDP" , "gini_rev_i" )]), order = c (2 , 1 , 2 ), include.drift = TRUE )
arima_CL.1785 _2020 <- Arima (subset (thetimes, year < 2021 )[, "cl.share" ], xreg = as.matrix (subset (thetimes, year < 2021 )[, c ("Penetration" , "media_autonomy_i" , "statistics" , "ccORG_per_article" , "ccPERSON_per_article" , "spending_GDP" , "gini_rev_i" )]), order = c (2 , 1 , 2 ), include.drift = TRUE )
dummat <- matrix (rnorm (n = 236 , mean = 0 , sd = 1 ))
colnames (dummat) <- "dummy"
thetimes$ dummy <- rnorm (n = 236 , mean = 0 , sd = 1 )
thetimes$ dummy2 <- rnorm (n = 236 , mean = 0 , sd = 1 )
e_arima_CL.1900 _2020 <- Arima (subset (thetimes, year > 1900 )[, "cl.share" ], xreg = as.matrix (subset (thetimes, year > 1900 )[, c ("dummy" , "dummy2" )]), order = c (2 , 1 , 2 ), include.drift = TRUE )
e_arima_CL.1785 _1900 <- Arima (subset (thetimes, year < 1900 )[, "cl.share" ], xreg = as.matrix (subset (thetimes, year < 1900 )[, c ("dummy" , "dummy2" )]), order = c (2 , 1 , 2 ), include.drift = TRUE )
e_arima_CL.1785 _1950 <- Arima (subset (thetimes, year < 1950 )[, "cl.share" ], xreg = as.matrix (subset (thetimes, year < 1950 )[, c ("dummy" , "dummy2" )]), order = c (2 , 1 , 2 ), include.drift = TRUE )
e_arima_CL.1785 _1975 <- Arima (subset (thetimes, year < 1975 )[, "cl.share" ], xreg = as.matrix (subset (thetimes, year < 1975 )[, c ("dummy" , "dummy2" )]), order = c (2 , 1 , 2 ), include.drift = TRUE )
e_arima_CL.1785 _1990 <- Arima (subset (thetimes, year < 1990 )[, "cl.share" ], xreg = as.matrix (subset (thetimes, year < 1990 )[, c ("dummy" , "dummy2" )]), order = c (2 , 1 , 2 ), include.drift = TRUE )
e_arima_CL.1785 _2000 <- Arima (subset (thetimes, year < 2000 )[, "cl.share" ], xreg = as.matrix (subset (thetimes, year < 2000 )[, c ("dummy" , "dummy2" )]), order = c (2 , 1 , 2 ), include.drift = TRUE )
e_arima_CL.1785 _2020 <- Arima (subset (thetimes, year < 2021 )[, "cl.share" ], xreg = as.matrix (subset (thetimes, year < 2021 )[, c ("dummy" , "dummy2" )]), order = c (2 , 1 , 2 ), include.drift = TRUE )
p_arima_CL.1900 _2020 <- Arima (subset (thetimes, year > 1900 )[, "cl.share" ], xreg = as.matrix (subset (thetimes, year > 1900 )[, c ("Penetration" , "media_autonomy_i" , "statistics" , "spending_GDP" , "gini_rev_i" )]), order = c (2 , 1 , 2 ), include.drift = TRUE )
p_arima_CL.1785 _1900 <- Arima (subset (thetimes, year < 1900 )[, "cl.share" ], xreg = as.matrix (subset (thetimes, year < 1900 )[, c ("Penetration" , "statistics" , "spending_GDP" , "gini_rev_i" )]), order = c (2 , 1 , 2 ), include.drift = TRUE )
p_arima_CL.1785 _1950 <- Arima (subset (thetimes, year < 1950 )[, "cl.share" ], xreg = as.matrix (subset (thetimes, year < 1950 )[, c ("Penetration" , "statistics" , "spending_GDP" , "gini_rev_i" )]), order = c (2 , 1 , 2 ), include.drift = TRUE )
p_arima_CL.1785 _1975 <- Arima (subset (thetimes, year < 1975 )[, "cl.share" ], xreg = as.matrix (subset (thetimes, year < 1975 )[, c ("Penetration" , "media_autonomy_i" , "statistics" , "spending_GDP" , "gini_rev_i" )]), order = c (2 , 1 , 2 ), include.drift = TRUE )
p_arima_CL.1785 _1990 <- Arima (subset (thetimes, year < 1990 )[, "cl.share" ], xreg = as.matrix (subset (thetimes, year < 1990 )[, c ("Penetration" , "media_autonomy_i" , "statistics" , "spending_GDP" , "gini_rev_i" )]), order = c (2 , 1 , 2 ), include.drift = TRUE )
p_arima_CL.1785 _2000 <- Arima (subset (thetimes, year < 2000 )[, "cl.share" ], xreg = as.matrix (subset (thetimes, year < 2000 )[, c ("Penetration" , "media_autonomy_i" , "statistics" , "spending_GDP" , "gini_rev_i" )]), order = c (2 , 1 , 2 ), include.drift = TRUE )
p_arima_CL.1785 _2020 <- Arima (subset (thetimes, year < 2021 )[, "cl.share" ], xreg = as.matrix (subset (thetimes, year < 2021 )[, c ("Penetration" , "media_autonomy_i" , "statistics" , "spending_GDP" , "gini_rev_i" )]), order = c (2 , 1 , 2 ), include.drift = TRUE )
aarima_CL.1785 _2020 <- auto.arima (subset (thetimes, year < 2021 )[, "cl.share" ], xreg = as.matrix (subset (thetimes, year < 2021 )[, c ("Penetration" , "media_autonomy_i" , "statistics" , "ccORG_per_article" , "ccPERSON_per_article" , "spending_GDP" , "gini_rev" )]), include.drift = TRUE )
auto.arima (lm_CL1$ resid)
df_fc_1785_1900 <- data.frame (forecast (arima_CL.1785 _1900, thetimes[, "cl.share" ], xreg = as.matrix (thetimes[, c ("Penetration" , "statistics" , "ccORG_per_article" , "ccPERSON_per_article" , "spending_GDP" , "gini_rev_i" )])))
df_fc_1785_1950 <- data.frame (forecast (arima_CL.1785 _1950, thetimes[, "cl.share" ], xreg = as.matrix (thetimes[, c ("Penetration" , "statistics" , "ccORG_per_article" , "ccPERSON_per_article" , "spending_GDP" , "gini_rev_i" )])))
df_fc_1785_1975 <- data.frame (forecast (arima_CL.1785 _1975, thetimes[, "cl.share" ], xreg = as.matrix (thetimes[, c ("Penetration" , "media_autonomy_i" , "statistics" , "ccORG_per_article" , "ccPERSON_per_article" , "spending_GDP" , "gini_rev_i" )])))
df_fc_1785_1990 <- data.frame (forecast (arima_CL.1785 _1990, thetimes[, "cl.share" ], xreg = as.matrix (thetimes[, c ("Penetration" , "media_autonomy_i" , "statistics" , "ccORG_per_article" , "ccPERSON_per_article" , "spending_GDP" , "gini_rev_i" )])))
df_fc_1785_2000 <- data.frame (forecast (arima_CL.1785 _2000, thetimes[, "cl.share" ], xreg = as.matrix (thetimes[, c ("Penetration" , "media_autonomy_i" , "statistics" , "ccORG_per_article" , "ccPERSON_per_article" , "spending_GDP" , "gini_rev_i" )])))
df_fc_1785_2020 <- data.frame (forecast (arima_CL.1785 _2020, thetimes[, "cl.share" ], xreg = as.matrix (thetimes[, c ("Penetration" , "media_autonomy_i" , "statistics" , "ccORG_per_article" , "ccPERSON_per_article" , "spending_GDP" , "gini_rev_i" )])))
df_fc_1900_2020 <- data.frame (forecast (arima_CL.1900 _2020, thetimes[, "cl.share" ], xreg = as.matrix (thetimes[, c ("Penetration" , "media_autonomy_i" , "statistics" , "ccORG_per_article" , "ccPERSON_per_article" , "spending_GDP" , "gini_rev_i" )])))
p.df_fc_1785_1900 <- data.frame (forecast (p_arima_CL.1785 _1900, thetimes[, "cl.share" ], xreg = as.matrix (thetimes[, c ("Penetration" , "statistics" , "spending_GDP" , "gini_rev_i" )])))
p.df_fc_1785_1950 <- data.frame (forecast (p_arima_CL.1785 _1950, thetimes[, "cl.share" ], xreg = as.matrix (thetimes[, c ("Penetration" , "statistics" , "spending_GDP" , "gini_rev_i" )])))
p.df_fc_1785_1975 <- data.frame (forecast (p_arima_CL.1785 _1975, thetimes[, "cl.share" ], xreg = as.matrix (thetimes[, c ("Penetration" , "media_autonomy_i" , "statistics" , "spending_GDP" , "gini_rev_i" )])))
p.df_fc_1785_1990 <- data.frame (forecast (p_arima_CL.1785 _1990, thetimes[, "cl.share" ], xreg = as.matrix (thetimes[, c ("Penetration" , "media_autonomy_i" , "statistics" , "spending_GDP" , "gini_rev_i" )])))
p.df_fc_1785_2000 <- data.frame (forecast (p_arima_CL.1785 _2000, thetimes[, "cl.share" ], xreg = as.matrix (thetimes[, c ("Penetration" , "media_autonomy_i" , "statistics" , "spending_GDP" , "gini_rev_i" )])))
p.df_fc_1785_2020 <- data.frame (forecast (p_arima_CL.1785 _2020, thetimes[, "cl.share" ], xreg = as.matrix (thetimes[, c ("Penetration" , "media_autonomy_i" , "statistics" , "spending_GDP" , "gini_rev_i" )])))
p.df_fc_1900_2020 <- data.frame (forecast (p_arima_CL.1900 _2020, thetimes[, "cl.share" ], xreg = as.matrix (thetimes[, c ("Penetration" , "media_autonomy_i" , "statistics" , "spending_GDP" , "gini_rev_i" )])))
e.df_fc_1785_1900 <- data.frame (forecast (e_arima_CL.1785 _1900, thetimes[, "cl.share" ], xreg = as.matrix (thetimes[, c ("dummy" , "dummy2" )])))
e.df_fc_1785_1950 <- data.frame (forecast (e_arima_CL.1785 _1950, thetimes[, "cl.share" ], xreg = as.matrix (thetimes[, c ("dummy" , "dummy2" )])))
e.df_fc_1785_1975 <- data.frame (forecast (e_arima_CL.1785 _1975, thetimes[, "cl.share" ], xreg = as.matrix (thetimes[, c ("dummy" , "dummy2" )])))
e.df_fc_1785_1990 <- data.frame (forecast (e_arima_CL.1785 _1990, thetimes[, "cl.share" ], xreg = as.matrix (thetimes[, c ("dummy" , "dummy2" )])))
e.df_fc_1785_2000 <- data.frame (forecast (e_arima_CL.1785 _2000, thetimes[, "cl.share" ], xreg = as.matrix (thetimes[, c ("dummy" , "dummy2" )])))
e.df_fc_1785_2020 <- data.frame (forecast (e_arima_CL.1785 _2020, thetimes[, "cl.share" ], xreg = as.matrix (thetimes[, c ("dummy" , "dummy2" )])))
e.df_fc_1900_2020 <- data.frame (forecast (e_arima_CL.1900 _2020, thetimes[, "cl.share" ], xreg = as.matrix (thetimes[, c ("dummy" , "dummy2" )])))
thetimes$ afc.1785 _1900 <- (df_fc_1785_1900$ Point.Forecast)
thetimes$ afc.1785 _1950 <- (df_fc_1785_1950$ Point.Forecast)
thetimes$ afc.1785 _1975 <- (df_fc_1785_1975$ Point.Forecast)
thetimes$ afc.1785 _1990 <- (df_fc_1785_1990$ Point.Forecast)
thetimes$ afc.1785 _2000 <- (df_fc_1785_2000$ Point.Forecast)
thetimes$ afc.1785 _2020 <- (df_fc_1785_2020$ Point.Forecast)
thetimes$ afc.1900 _2020 <- (df_fc_1900_2020$ Point.Forecast)
thetimes$ e_afc.1785 _1900 <- (e.df_fc_1785_1900$ Point.Forecast)
thetimes$ e_afc.1785 _1950 <- (e.df_fc_1785_1950$ Point.Forecast)
thetimes$ e_afc.1785 _1975 <- (e.df_fc_1785_1975$ Point.Forecast)
thetimes$ e_afc.1785 _1990 <- (e.df_fc_1785_1990$ Point.Forecast)
thetimes$ e_afc.1785 _2000 <- (e.df_fc_1785_2000$ Point.Forecast)
thetimes$ e_afc.1785 _2020 <- (e.df_fc_1785_2020$ Point.Forecast)
thetimes$ e_afc.1900 _2020 <- (e.df_fc_1900_2020$ Point.Forecast)
thetimes$ p_afc.1785 _1900 <- (p.df_fc_1785_1900$ Point.Forecast)
thetimes$ p_afc.1785 _1950 <- (p.df_fc_1785_1950$ Point.Forecast)
thetimes$ p_afc.1785 _1975 <- (p.df_fc_1785_1975$ Point.Forecast)
thetimes$ p_afc.1785 _1990 <- (p.df_fc_1785_1990$ Point.Forecast)
thetimes$ p_afc.1785 _2000 <- (p.df_fc_1785_2000$ Point.Forecast)
thetimes$ p_afc.1785 _2020 <- (p.df_fc_1785_2020$ Point.Forecast)
thetimes$ p_afc.1900 _2020 <- (p.df_fc_1900_2020$ Point.Forecast)
arimafc_long <- pivot_longer (thetimes, col = c ("afc.1785_1900" , "afc.1785_1950" , "afc.1785_1975" , "afc.1785_1990" , "afc.1785_2000" , "afc.1785_2020" , "afc.1900_2020" ))
e_arimafc_long <- pivot_longer (thetimes, col = c ("e_afc.1785_1900" , "e_afc.1785_1950" , "e_afc.1785_1975" , "e_afc.1785_1990" , "e_afc.1785_2000" , "e_afc.1785_2020" , "e_afc.1900_2020" ))
p_arimafc_long <- pivot_longer (thetimes, col = c ("p_afc.1785_1900" , "p_afc.1785_1950" , "p_afc.1785_1975" , "p_afc.1785_1990" , "p_afc.1785_2000" , "p_afc.1785_2020" , "p_afc.1900_2020" ))
ggplot (df_fc_1785_1900, aes (y = Point.Forecast, x = 116 : 351 , ymin = Lo.95 , ymax = Hi.95 )) +
geom_ribbon (alpha = .25 ) +
geom_line ()
arimafc_long$ value_i <- ifelse (arimafc_long$ name == "afc.1900_2020" , arimafc_long$ value - 0.02750686 , arimafc_long$ value)
ggplot (arimafc_longX, aes (x = year)) +
geom_point (aes (y = value), color = "red" , shape = 15 ) +
geom_point (aes (y = cl.share), color = "black" , shape = 18 ) +
facet_grid (name ~ .) +
theme_soft ()
arimafc_longX <- arimafc_long[order (arimafc_long$ name, arimafc_long$ year), ]
p_arimafc_longX <- p_arimafc_long[order (p_arimafc_long$ name, p_arimafc_long$ year), ]
e_arimafc_longX <- e_arimafc_long[order (e_arimafc_long$ name, e_arimafc_long$ year), ]
pmeans <- tapply (p_arimafc_longX$ value, p_arimafc_longX$ name, mean, na.rm = TRUE ) - tapply (p_arimafc_longX$ cl.share, p_arimafc_longX$ name, mean, na.rm = TRUE )
emeans <- tapply (e_arimafc_longX$ value, e_arimafc_longX$ name, mean, na.rm = TRUE ) - tapply (e_arimafc_longX$ cl.share, e_arimafc_longX$ name, mean, na.rm = TRUE )
xmeans <- tapply (arimafc_longX$ value, arimafc_longX$ name, mean, na.rm = TRUE ) - tapply (arimafc_longX$ cl.share, arimafc_longX$ name, mean, na.rm = TRUE )
p_arimafc_longX$ avg <- as.numeric (pmeans[match (p_arimafc_longX$ name, names (pmeans))])
e_arimafc_longX$ avg <- as.numeric (pmeans[match (e_arimafc_longX$ name, names (emeans))])
arimafc_longX$ avg <- as.numeric (pmeans[match (arimafc_longX$ name, names (xmeans))])
p_arimafc_longX$ value_i <- p_arimafc_longX$ value - p_arimafc_longX$ avg
e_arimafc_longX$ value_i <- e_arimafc_longX$ value - e_arimafc_longX$ avg
arimafc_longX$ value_i <- arimafc_longX$ value - arimafc_longX$ avg
prediction_longX <- data.frame (
value = c (prediction_long$ value, arimafc_longX$ value_i, p_arimafc_longX$ value, e_arimafc_longX$ value),
value_i = c (prediction_long$ value, arimafc_longX$ value_i, p_arimafc_longX$ value_i, e_arimafc_longX$ value_i),
year = c (prediction_long$ year, arimafc_longX$ year, p_arimafc_longX$ year, e_arimafc_longX$ year),
cl.share = c (prediction_long$ cl.share, arimafc_longX$ cl.share, p_arimafc_longX$ cl.share, e_arimafc_longX$ cl.share),
train_start = c (prediction_long$ train_start, as.numeric (str_extract (arimafc_longX$ name, "[:digit:]{4,4}" )), as.numeric (str_extract (p_arimafc_longX$ name, "[:digit:]{4,4}" )), as.numeric (str_extract (e_arimafc_longX$ name, "[:digit:]{4,4}" ))),
train_end = c (prediction_long$ train_end, as.numeric (str_extract (arimafc_longX$ name, "[:digit:]{4,4}$" )), as.numeric (str_extract (p_arimafc_longX$ name, "[:digit:]{4,4}$" )), as.numeric (str_extract (e_arimafc_longX$ name, "[:digit:]{4,4}$" ))),
predict_start = c (prediction_long$ predict_start, as.numeric (str_extract (arimafc_longX$ name, "[:digit:]{4,4}$" )), as.numeric (str_extract (p_arimafc_longX$ name, "[:digit:]{4,4}$" )), as.numeric (str_extract (e_arimafc_longX$ name, "[:digit:]{4,4}$" ))),
predict_end = c (prediction_long$ predict_end, rep (c (2020 ), times = c (1180 + 236 )), rep (c (1900 ), times = c (236 )), rep (c (2020 ), times = c (1180 + 236 )), rep (c (1900 ), times = c (236 )), rep (c (2020 ), times = c (1180 + 236 )), rep (c (1900 ), times = c (236 ))),
set = c (prediction_long$ set, rep ("ARIMA_with_ORG" , times = 7 * 236 ), rep ("ARIMA_without_ORG" , times = 7 * 236 ), rep ("ARIMA_without_predictors" , times = 7 * 236 )),
range = c (prediction_long$ range, rep (0.01 , times = 7 * 236 ), rep (0.01 , times = 7 * 236 ), rep (0.01 , times = 7 * 236 )),
train = c (prediction_long$ train, rep (c ("1785-1900" , "1785-1950" , "1785-1975" , "1785-1990" , "1785-2000" , "1785-2020" , "1900-2020" ), each = 236 ), rep (c ("1785-1900" , "1785-1950" , "1785-1975" , "1785-1990" , "1785-2000" , "1785-2020" , "1900-2020" ), each = 236 ), rep (c ("1785-1900" , "1785-1950" , "1785-1975" , "1785-1990" , "1785-2000" , "1785-2020" , "1900-2020" ), each = 236 ))
)
prediction_longX <- prediction_longX[order (prediction_longX$ set, prediction_longX$ train, prediction_longX$ year), ]
prediction_longX$ predict_end <- ifelse (prediction_longX$ year == 2020 , prediction_longX$ predict_end, NA )
prediction_longX$ train_end <- ifelse (prediction_longX$ year == 2020 , prediction_longX$ train_end, NA )
prediction_longX$ range <- 0.01
prediction_longX$ predict_start <- ifelse (prediction_longX$ set == "ARIMA_with_ORG" & prediction_longX$ train == "1900-2020" , 1785 , prediction_longX$ predict_start)
prediction_longX$ predict_end <- ifelse (prediction_longX$ set == "ARIMA_with_ORG" & prediction_longX$ train == "1900-2020" , 1900 , prediction_longX$ predict_end)
prediction_longX$ predict_end <- ifelse (prediction_longX$ year == 2020 , prediction_longX$ predict_end, NA )
prediction_longX$ train_end <- ifelse (prediction_longX$ year == 2020 , prediction_longX$ train_end, NA )
agg_avg <- aggregate (prediction_longX$ value, by = list (prediction_longX$ set, prediction_longX$ train), median, na.rm = TRUE )
agg_avg$ idx <- interaction (agg_avg$ Group.1 , agg_avg$ Group.2 )
prediction_longX$ idx <- interaction (prediction_longX$ set, prediction_longX$ train)
prediction_longX$ value_avg <- agg_avg[match (prediction_longX$ idx, agg_avg$ idx), "x" ]
prediction_longX$ value_x <- prediction_longX$ value - prediction_longX$ value_avg + mean (prediction_longX$ cl.share, na.rm = TRUE )
prediction_longX$ TRAIN <- factor (car:: Recode (prediction_longX$ train, "'1785-1900'='A) 1785-1900';'1785-1950'='B) 1785-1950';'1785-1975'='C) 1785-1975';'1785-1990'='D) 1785-1990';'1785-2000'='E) 1785-2000';'1900-2020'='F) 1900-2020';'1785-2020'='X) 1785-2020'" ), ordered = TRUE , levels = c ("A) 1785-1900" , "B) 1785-1950" , "C) 1785-1975" , "D) 1785-1990" , "E) 1785-2000" , "F) 1900-2020" , "X) 1785-2020" ))
prediction_longX$ SET <- factor (
car:: Recode (
prediction_longX$ set,
" 'ARIMA_with_ORG' ='T1) ARIMA_T-MG-O';
'ARIMA_without_ORG' ='T2) ARIMA_T-MG-.';
'ARIMA_without_predictors' ='T3) ARIMA_T-..-.';
'with_ORG' ='T4) Non-ARIMA_.-MG-O';
'without_ORG' ='T5) Non-ARIMA_.-MG-.'"
),
ordered = TRUE , levels = c (
"T1) ARIMA_T-MG-O" ,
"T2) ARIMA_T-MG-." ,
"T3) ARIMA_T-..-." ,
"T4) Non-ARIMA_.-MG-O" ,
"T5) Non-ARIMA_.-MG-."
)
)
prediction_longX[with (prediction_longX, (! is.na (train_end) & TRAIN == "F) 1900-2020" )), "train_end" ] <- 2020
prediction_longX[with (prediction_longX, (! is.na (train_start) & TRAIN == "F) 1900-2020" )), "train_start" ] <- 1900
prediction_longX[with (prediction_longX, (! is.na (predict_end) & TRAIN == "F) 1900-2020" )), "predict_end" ] <- 1900
prediction_longX[with (prediction_longX, (! is.na (predict_start) & TRAIN == "F) 1900-2020" )), "predict_start" ] <- 1785
gg_calibrationX <- ggplot (prediction_longX, aes (x = year)) +
geom_point (size = 1 , shape = 0 , aes (y = cl.share)) +
geom_point (size = 1 , color = "red" , shape = 16 , aes (y = value_x)) +
facet_grid (TRAIN ~ SET, scale = "fixed" ) +
theme_soft () +
ylab ("Crisis Labelling Salience" ) +
scale_y_continuous () +
geom_rect (aes (xmin = train_start, xmax = train_end, ymax = range * 10 ), ymin = 0.01 , fill = "darkblue" , alpha = .25 ) +
geom_rect (aes (xmin = predict_start, xmax = predict_end, ymax = range * 10 ), ymin = 0.01 , fill = "darkred" , alpha = .25 ) +
ylim (- 0.025 , 0.1 )
ggsave (file = "ignore_time.png" , device = "png" , gg_calibrationX, unit = "cm" , width = 20 , height = 14 , scale = 2.00 , dpi = 1200 )
ggplot (subset (prediction_longX, set == "ARIMA_with_ORG" ), aes (x = year)) +
geom_point (shape = 15 , aes (y = cl.share)) +
geom_point (color = "red" , shape = 18 , aes (y = value)) +
geom_rect (aes (xmin = train_start, xmax = train_end, ymax = range), ymin = 0 , fill = "darkblue" , alpha = .25 ) +
geom_rect (aes (xmin = predict_start, xmax = predict_end, ymax = range), ymin = 0 , fill = "darkred" , alpha = .25 ) +
facet_grid (train ~ set, scale = "free_y" ) +
theme_soft () +
ylab ("Crisis Labelling Salience" )
ggsave (file = "gg_calibrationX.svg" , device = "svg" , gg_calibrationX, unit = "cm" , width = 12 , height = 16 , scale = 2.00 , dpi = 1200 )
sqrt (with (subset (prediction_longX, set == "ARIMA_without_ORG" & train == "1785-1900" & year %in% 1900 : 2020 ), mean (abs (cl.share - value_x)^ 2 )))
sqrt (with (subset (prediction_longX, set == "ARIMA_without_ORG" & train == "1785-1950" & year %in% 1950 : 2020 ), mean (abs (cl.share - value_x)^ 2 )))
sqrt (with (subset (prediction_longX, set == "ARIMA_without_ORG" & train == "1785-1975" & year %in% 1975 : 2020 ), mean (abs (cl.share - value_x)^ 2 )))
sqrt (with (subset (prediction_longX, set == "ARIMA_without_ORG" & train == "1785-1990" & year %in% 1990 : 2020 ), mean (abs (cl.share - value_x)^ 2 )))
sqrt (with (subset (prediction_longX, set == "ARIMA_without_ORG" & train == "1785-2000" & year %in% 2000 : 2020 ), mean (abs (cl.share - value_x)^ 2 )))
sqrt (with (subset (prediction_longX, set == "ARIMA_without_ORG" & train == "1785-2020" & year %in% 1785 : 2020 ), mean (abs (cl.share - value_x)^ 2 )))
sqrt (with (subset (prediction_longX, set == "ARIMA_without_ORG" & train == "1900-2020" & year %in% 1785 : 1900 ), mean (abs (cl.share - value_x)^ 2 )))
sqrt (with (subset (prediction_longX, set == "ARIMA_without_predictors" & train == "1785-1900" & year %in% 1900 : 2020 ), mean (abs (cl.share - value_x)^ 2 )))
sqrt (with (subset (prediction_longX, set == "ARIMA_without_predictors" & train == "1785-1950" & year %in% 1950 : 2020 ), mean (abs (cl.share - value_x)^ 2 )))
sqrt (with (subset (prediction_longX, set == "ARIMA_without_predictors" & train == "1785-1975" & year %in% 1975 : 2020 ), mean (abs (cl.share - value_x)^ 2 )))
sqrt (with (subset (prediction_longX, set == "ARIMA_without_predictors" & train == "1785-1990" & year %in% 1990 : 2020 ), mean (abs (cl.share - value_x)^ 2 )))
sqrt (with (subset (prediction_longX, set == "ARIMA_without_predictors" & train == "1785-2000" & year %in% 2000 : 2020 ), mean (abs (cl.share - value_x)^ 2 )))
sqrt (with (subset (prediction_longX, set == "ARIMA_without_predictors" & train == "1785-2020" & year %in% 1785 : 2020 ), mean (abs (cl.share - value_x)^ 2 )))
sqrt (with (subset (prediction_longX, set == "ARIMA_without_predictors" & train == "1900-2020" & year %in% 1785 : 1900 ), mean (abs (cl.share - value_x)^ 2 )))
sqrt (with (subset (prediction_longX, set == "ARIMA_with_ORG" & train == "1785-1900" & year %in% 1900 : 2020 ), mean (abs (cl.share - value_x)^ 2 )))
sqrt (with (subset (prediction_longX, set == "ARIMA_with_ORG" & train == "1785-1950" & year %in% 1950 : 2020 ), mean (abs (cl.share - value_x)^ 2 )))
sqrt (with (subset (prediction_longX, set == "ARIMA_with_ORG" & train == "1785-1975" & year %in% 1975 : 2020 ), mean (abs (cl.share - value_x)^ 2 )))
sqrt (with (subset (prediction_longX, set == "ARIMA_with_ORG" & train == "1785-1990" & year %in% 1990 : 2020 ), mean (abs (cl.share - value_x)^ 2 )))
sqrt (with (subset (prediction_longX, set == "ARIMA_with_ORG" & train == "1785-2000" & year %in% 2000 : 2020 ), mean (abs (cl.share - value_x)^ 2 )))
sqrt (with (subset (prediction_longX, set == "ARIMA_with_ORG" & train == "1785-2020" & year %in% 1785 : 2020 ), mean (abs (cl.share - value_x)^ 2 )))
sqrt (with (subset (prediction_longX, set == "ARIMA_with_ORG" & train == "1900-2020" & year %in% 1785 : 1900 ), mean (abs (cl.share - value_x)^ 2 )))
sqrt (with (subset (prediction_longX, set == "with_ORG" & train == "1785-1900" & year %in% 1900 : 2020 ), mean (abs (cl.share - value_x)^ 2 )))
sqrt (with (subset (prediction_longX, set == "with_ORG" & train == "1785-1950" & year %in% 1950 : 2020 ), mean (abs (cl.share - value_x)^ 2 )))
sqrt (with (subset (prediction_longX, set == "with_ORG" & train == "1785-1975" & year %in% 1975 : 2020 ), mean (abs (cl.share - value_x)^ 2 )))
sqrt (with (subset (prediction_longX, set == "with_ORG" & train == "1785-1990" & year %in% 1990 : 2020 ), mean (abs (cl.share - value_x)^ 2 )))
sqrt (with (subset (prediction_longX, set == "with_ORG" & train == "1785-2000" & year %in% 2000 : 2020 ), mean (abs (cl.share - value_x)^ 2 )))
sqrt (with (subset (prediction_longX, set == "with_ORG" & train == "1785-2020" & year %in% 1785 : 2020 ), mean (abs (cl.share - value_x)^ 2 )))
sqrt (with (subset (prediction_longX, set == "with_ORG" & train == "1900-2020" & year %in% 1785 : 1900 ), mean (abs (cl.share - value_x)^ 2 )))
sqrt (with (subset (prediction_longX, set == "without_ORG" & train == "1785-1900" & year %in% 1900 : 2020 ), mean (abs (cl.share - value_x)^ 2 )))
sqrt (with (subset (prediction_longX, set == "without_ORG" & train == "1785-1950" & year %in% 1950 : 2020 ), mean (abs (cl.share - value_x)^ 2 )))
sqrt (with (subset (prediction_longX, set == "without_ORG" & train == "1785-1975" & year %in% 1975 : 2020 ), mean (abs (cl.share - value_x)^ 2 )))
sqrt (with (subset (prediction_longX, set == "without_ORG" & train == "1785-1990" & year %in% 1990 : 2020 ), mean (abs (cl.share - value_x)^ 2 )))
sqrt (with (subset (prediction_longX, set == "without_ORG" & train == "1785-2000" & year %in% 2000 : 2020 ), mean (abs (cl.share - value_x)^ 2 )))
sqrt (with (subset (prediction_longX, set == "without_ORG" & train == "1785-2020" & year %in% 1785 : 2020 ), mean (abs (cl.share - value_x)^ 2 )))
sqrt (with (subset (prediction_longX, set == "without_ORG" & train == "1900-2020" & year %in% 1785 : 1900 ), mean (abs (cl.share - value_x)^ 2 )))
dict_crisis <- dictionary (list (crisis = c ("crisis.*" , "crises.*" )))
dict_disaster <- dictionary (list (disaster = c ("disaster.*" )))
dict_collapse <- dictionary (list (collapse = c ("collaps.*" )))
dict_recession <- dictionary (list (recession = c ("recession.*" )))
dict_emergency <- dictionary (list (emergency = c ("emergency.*" , "emergencies.*" )))
dict_catastrophe <- dictionary (list (catastrophe = c ("catastroph.*" )))
dict_epidemic <- dictionary (list (epidemic = c ("epidemic.*" , "pandemic.*" , "" , "" )))
dict_breakdown <- dictionary (list (breakdown = c ("breakdown.*" )))
dict_debacle <- dictionary (list (debacle = c ("debacl.*" )))
kw_crisis <- tokens_lookup (x = cc.tokens, dictionary = dict_crisis, nomatch = "NO" )
kw_disaster <- tokens_lookup (x = cc.tokens, dictionary = dict_disaster, nomatch = "NO" )
kw_collapse <- tokens_lookup (x = cc.tokens, dictionary = dict_collapse, nomatch = "NO" )
kw_recession <- tokens_lookup (x = cc.tokens, dictionary = dict_recession, nomatch = "NO" )
kw_emergency <- tokens_lookup (x = cc.tokens, dictionary = dict_emergency, nomatch = "NO" )
kw_catastrophe <- tokens_lookup (x = cc.tokens, dictionary = dict_catastrophe, nomatch = "NO" )
kw_epidemic <- tokens_lookup (x = cc.tokens, dictionary = dict_epidemic, nomatch = "NO" )
kw_breakdown <- tokens_lookup (x = cc.tokens, dictionary = dict_breakdown, nomatch = "NO" )
kw_debacle <- tokens_lookup (x = cc.tokens, dictionary = dict_debacle, nomatch = "NO" )
kwsum <- function (x, kw) {
counts <- lapply (x, pattern = kw, FUN = str_count)
kwcount <- as.numeric (lapply (counts, FUN = sum))
return (kwcount)
}
kw_crisis_sum <- kwsum (x = kw_crisis, kw = "crisis" )
kw_disaster_sum <- kwsum (x = kw_disaster, kw = "disaster" )
kw_collapse_sum <- kwsum (x = kw_collapse, kw = "collapse" )
kw_recession_sum <- kwsum (x = kw_recession, kw = "recession" )
kw_emergency_sum <- kwsum (x = kw_emergency, kw = "emergency" )
kw_catastrophe_sum <- kwsum (x = kw_catastrophe, kw = "catastrophe" )
kw_epidemic_sum <- kwsum (x = kw_epidemic, kw = "epidemic" )
kw_breakdown_sum <- kwsum (x = kw_breakdown, kw = "breakdown" )
kw_debacle_sum <- kwsum (x = kw_debacle, kw = "debacle" )
prop.table (table (kw_crisis_sum > 0 ))
prop.table (table (kw_disaster_sum > 0 ))
prop.table (table (kw_collapse_sum > 0 ))
prop.table (table (kw_recession_sum > 0 ))
prop.table (table (kw_emergency_sum > 0 ))
prop.table (table (kw_catastrophe_sum > 0 ))
prop.table (table (kw_epidemic_sum > 0 ))
prop.table (table (kw_breakdown_sum > 0 ))
prop.table (table (kw_debacle_sum > 0 ))
with (subset (prediction_longX, set == "ARIMA_with_ORG" & train == "1785-1900" & year %in% 1900 : 2020 ), cor.test (cl.share, value))$ estimate^ 2
with (subset (prediction_longX, set == "ARIMA_with_ORG" & train == "1785-1950" & year %in% 1950 : 2020 ), cor.test (cl.share, value))$ estimate^ 2
with (subset (prediction_longX, set == "ARIMA_with_ORG" & train == "1785-1975" & year %in% 1975 : 2020 ), cor.test (cl.share, value))$ estimate^ 2
with (subset (prediction_longX, set == "ARIMA_with_ORG" & train == "1785-1990" & year %in% 1990 : 2020 ), cor.test (cl.share, value))$ estimate^ 2
with (subset (prediction_longX, set == "ARIMA_with_ORG" & train == "1785-2000" & year %in% 2000 : 2020 ), cor.test (cl.share, value))$ estimate^ 2
with (subset (prediction_longX, set == "ARIMA_with_ORG" & train == "1785-2020" & year %in% 1785 : 2020 ), cor.test (cl.share, value))$ estimate^ 2
with (subset (prediction_longX, set == "ARIMA_with_ORG" & train == "1900-2020" & year %in% 1785 : 1900 ), cor.test (cl.share, value))$ estimate^ 2
with (subset (prediction_longX, set == "with_ORG" & train == "1785-1900" & year %in% 1900 : 2020 ), cor.test (cl.share, value))$ estimate^ 2
with (subset (prediction_longX, set == "with_ORG" & train == "1785-1950" & year %in% 1950 : 2020 ), cor.test (cl.share, value))$ estimate^ 2
with (subset (prediction_longX, set == "with_ORG" & train == "1785-1975" & year %in% 1975 : 2020 ), cor.test (cl.share, value))$ estimate^ 2
with (subset (prediction_longX, set == "with_ORG" & train == "1785-1990" & year %in% 1990 : 2020 ), cor.test (cl.share, value))$ estimate^ 2
with (subset (prediction_longX, set == "with_ORG" & train == "1785-2000" & year %in% 2000 : 2020 ), cor.test (cl.share, value))$ estimate^ 2
with (subset (prediction_longX, set == "with_ORG" & train == "1785-2020" & year %in% 1785 : 2020 ), cor.test (cl.share, value))$ estimate^ 2
with (subset (prediction_longX, set == "with_ORG" & train == "1900-2020" & year %in% 1785 : 1900 ), cor.test (cl.share, value))$ estimate^ 2
with (subset (prediction_longX, set == "without_ORG" & train == "1785-1900" & year %in% 1900 : 2020 ), cor.test (cl.share, value))$ estimate^ 2
with (subset (prediction_longX, set == "without_ORG" & train == "1785-1950" & year %in% 1950 : 2020 ), cor.test (cl.share, value))$ estimate^ 2
with (subset (prediction_longX, set == "without_ORG" & train == "1785-1975" & year %in% 1975 : 2020 ), cor.test (cl.share, value))$ estimate^ 2
with (subset (prediction_longX, set == "without_ORG" & train == "1785-1990" & year %in% 1990 : 2020 ), cor.test (cl.share, value))$ estimate^ 2
with (subset (prediction_longX, set == "without_ORG" & train == "1785-2000" & year %in% 2000 : 2020 ), cor.test (cl.share, value))$ estimate^ 2
with (subset (prediction_longX, set == "without_ORG" & train == "1785-2020" & year %in% 1785 : 2020 ), cor.test (cl.share, value))$ estimate^ 2
with (subset (prediction_longX, set == "without_ORG" & train == "1900-2020" & year %in% 1785 : 1900 ), cor.test (cl.share, value))$ estimate^ 2
stargazer (lm_CL1, lm_CL2, lm_CL7, lm_CL6, lm_CL5, lm_CL4, lm_CL3, type = "text" )
stargazer (lm_CL1X, lm_CL2X, lm_CL7X, lm_CL6X, lm_CL5X, lm_CL4X, lm_CL3X, type = "text" )
lm_CLX1 <- lm (cl.share ~ Penetration * media_autonomy_i, data = thetimes)
lm_CLX2 <- lm (cl.share ~ statistics + spending_GDP + gini_rev_i + ministries, data = thetimes)
lm_CLX3 <- lm (cl.share ~ ccORG1000 + ccPERSON_per_article, data = thetimes)
lm_CLX4 <- lm (cl.share ~ Penetration * media_autonomy_i + statistics + spending_GDP + gini_rev_i + ministries, data = thetimes)
lm_CLX5 <- lm (cl.share ~ Penetration * media_autonomy_i + statistics + spending_GDP + gini_rev_i + ministries + ccORG1000 + ccPERSON_per_article, data = thetimes)
stargazer (lm_CLX1, lm_CLX2, lm_CLX3, lm_CLX4, lm_CLX5, type = "text" )
ORGlm_3Y <- (lm (ccORG1000 ~ spending_GDP + gini_rev_i + Penetration + media_autonomy_i + 1 + dyear, data = thetimes))
ORGlm_3 <- (lm (ccORG1000 ~ spending_GDP + gini_rev_i + Penetration + media_autonomy_i + 1 , data = thetimes))
ORGlm_2Y <- (lm (ccORG1000 ~ spending_GDP + gini_rev_i + 1 + dyear, data = thetimes))
ORGlm_2 <- (lm (ccORG1000 ~ spending_GDP + gini_rev_i + 1 , data = thetimes))
ORGlm_1Y <- (lm (ccORG1000 ~ Penetration + media_autonomy_i + 1 + dyear, data = thetimes))
ORGlm_1 <- (lm (ccORG1000 ~ Penetration + media_autonomy_i + 1 , data = thetimes))
ORGlm_0Y <- (lm (ccORG1000 ~ 1 + dyear, data = thetimes))
ORGlm_0 <- (lm (ccORG1000 ~ 1 , data = thetimes))
stargazer (ORGlm_0, ORGlm_1, ORGlm_2, ORGlm_3, type = "html" , intercept.bottom = FALSE , star.cutoffs = c (.05 , .01 , .001 ), single.row = TRUE )
stargazer (ORGlm_0Y, ORGlm_1Y, ORGlm_2Y, ORGlm_3Y, type = "html" , intercept.bottom = FALSE , star.cutoffs = c (.05 , .01 , .001 ), single.row = TRUE )
anova (ORGlm_1, ORGlm_3)
anova (ORGlm_2, ORGlm_3)
thetimes$ CLS_100 <- Arima (thetimes$ cl.share, order = c (1 , 0 , 0 ))$ resid
thetimes$ CLS_010 <- Arima (thetimes$ cl.share, order = c (0 , 1 , 0 ))$ resid
thetimes$ CLS_002 <- Arima (thetimes$ cl.share, order = c (0 , 0 , 2 ))$ resid
CClm_x0 <- (lm (CLS_002 ~ 1 , data = thetimes))
CClm_x1 <- (lm (CLS_002 ~ Penetration + media_autonomy_i + 1 , data = thetimes))
CClm_x1b <- (lm (CLS_002 ~ spending_GDP + gini_rev_i + 1 , data = thetimes))
CClm_x1c <- (lm (CLS_002 ~ ccORG_per_article + 1 , data = thetimes))
CClm_x2 <- (lm (CLS_002 ~ Penetration + media_autonomy_i + spending_GDP + gini_rev_i + 1 , data = thetimes))
CClm_x3 <- (lm (CLS_002 ~ Penetration + media_autonomy_i + spending_GDP + gini_rev_i + ccORG_per_article + 1 , data = thetimes))
CClm_0 <- (lm (CLS_010 ~ 1 , data = thetimes))
CClm_1 <- (lm (CLS_010 ~ Penetration + media_autonomy_i + 1 , data = thetimes))
CClm_1b <- (lm (CLS_010 ~ spending_GDP + gini_rev_i + 1 , data = thetimes))
CClm_1c <- (lm (CLS_010 ~ ccORG_per_article + 1 , data = thetimes))
CClm_2 <- (lm (CLS_010 ~ Penetration + media_autonomy_i + spending_GDP + gini_rev_i + 1 , data = thetimes))
CClm_3 <- (lm (CLS_010 ~ Penetration + media_autonomy_i + spending_GDP + gini_rev_i + ccORG_per_article + 1 , data = thetimes))
CClm_0 <- (lm (CLS_100 ~ 1 , data = thetimes))
CClm_1 <- (lm (CLS_100 ~ Penetration + media_autonomy_i + 1 , data = thetimes))
CClm_1b <- (lm (CLS_100 ~ spending_GDP + gini_rev_i + 1 , data = thetimes))
CClm_1c <- (lm (CLS_100 ~ ccORG_per_article + 1 , data = thetimes))
CClm_2 <- (lm (CLS_100 ~ Penetration + media_autonomy_i + spending_GDP + gini_rev_i + 1 , data = thetimes))
CClm_3 <- (lm (CLS_100 ~ Penetration + media_autonomy_i + spending_GDP + gini_rev_i + ccORG_per_article + 1 , data = thetimes))
thetimes$ CL_pcent <- thetimes$ cl.share * 100
thetimes$ CNW_pcent <- thetimes$ cnw.share * 100
thetimes$ dyear <- thetimes$ year - 1785
CClm_0 <- (lm (CL_pcent ~ 1 , data = thetimes))
CClm_1 <- (lm (CL_pcent ~ Penetration + media_autonomy_i + 1 , data = thetimes))
CClm_1b <- (lm (CL_pcent ~ spending_GDP + gini_rev_i + 1 , data = thetimes))
CClm_1c <- (lm (CL_pcent ~ ccORG1000 + 1 , data = thetimes))
CClm_2 <- (lm (CL_pcent ~ Penetration + media_autonomy_i + spending_GDP + gini_rev_i + 1 , data = thetimes))
CClm_3 <- (lm (CL_pcent ~ Penetration + media_autonomy_i + spending_GDP + gini_rev_i + ccORG1000 + 1 , data = thetimes))
CClm_0Y <- (lm (CL_pcent ~ dyear + 1 , data = thetimes))
CClm_1aY <- (lm (CL_pcent ~ Penetration + media_autonomy_i + dyear, data = thetimes))
CClm_1bY <- (lm (CL_pcent ~ spending_GDP + gini_rev_i + dyear, data = thetimes))
CClm_1cY <- (lm (CL_pcent ~ ccORG1000 + dyear, data = thetimes))
CClm_2Y <- (lm (CL_pcent ~ Penetration + media_autonomy_i + spending_GDP + gini_rev_i + 1 + dyear, data = thetimes))
CClm_3Y <- (lm (CL_pcent ~ Penetration + media_autonomy_i + spending_GDP + gini_rev_i + ccORG1000 + 1 + dyear, data = thetimes))
anova (CClm_0, CClm_1)
stargazer (CClm_0, CClm_1, CClm_1b, CClm_1c, CClm_2, CClm_3, type = "html" , intercept.bottom = FALSE , star.cutoffs = c (.05 , .01 , .001 ), single.row = TRUE )
stargazer (CClm_0Y, CClm_1aY, CClm_1bY, CClm_1cY, CClm_2Y, CClm_3Y, type = "html" , intercept.bottom = FALSE , star.cutoffs = c (.05 , .01 , .001 ), single.row = TRUE )
anova (CClm_0, CClm_1, CClm_2, CClm_3)
stargazer (ORGlm_0, ORGlm_1, ORGlm_2, ORGlm_3, type = "html" , intercept.bottom = FALSE , star.cutoffs = c (.05 , .01 , .001 ), single.row = TRUE )
stargazer (CClm_0, CClm_1, CClm_1b, CClm_1c, CClm_2, CClm_3, CClm_Y, type = "text" )
med_penetration <- mediate (model.m = ORGlm_3, model.y = CClm_3Y, mediator = "ccORG1000" , treat = "Penetration" )
med_autonomy <- mediate (model.m = ORGlm_3, model.y = CClm_3Y, mediator = "ccORG1000" , treat = "media_autonomy_i" )
med_spending <- mediate (model.m = ORGlm_3, model.y = CClm_3Y, mediator = "ccORG1000" , treat = "spending_GDP" )
med_diversity <- mediate (model.m = ORGlm_3, model.y = CClm_3Y, mediator = "ccORG1000" , treat = "gini_rev_i" )
sensitivity_penetration <- medsens (med_penetration, effect.type = "both" )
sensitivity_autonomy <- medsens (med_autonomy, effect.type = "both" )
sensitivity_spending <- medsens (med_spending, effect.type = "both" )
sensitivity_diversity <- medsens (med_diversity, effect.type = "both" )
thetimes$ ccORG1000 <- thetimes$ ccORG_per_article * 1000
thetimes$ logCNW <- log (thetimes$ cnw + 1 )
lmodel <- "
CL_pcent ~ 1 + d*ccORG1000 + spending_GDP + gini_rev_i + Penetration + media_autonomy_i
CNW_pcent ~ 1 + c*ccORG1000 + spending_GDP + gini_rev_i + Penetration + media_autonomy_i
logCNW ~ 1 + b*ccORG1000 + spending_GDP + gini_rev_i + Penetration + media_autonomy_i
ccORG1000 ~ 1 + a1*spending_GDP + a2*gini_rev_i + a3*Penetration + a4*media_autonomy_i
intensity_cls:=a1*d
intensity_cnws:=a1*c
intensity_cnw:=a1*b
diversity_cls:=a2*d
diversity_cnws:=a2*c
diversity_cnw:=a2*b
penetration_cls:=a3*d
penetration_cnws:=a3*c
penetration_cnw:=a3*b
autonomy_cls:=a4*d
autonomy_cnws:=a4*c
autonomy_cnw:=a4*b
"
lmodel2 <- "
CL_pcent ~ 1 + d*ccORG1000
CNW_pcent ~ 1 + c*ccORG1000
logCNW ~ 1 + b*ccORG1000
ccORG1000 ~ 1 + a1*spending_GDP + a2*gini_rev_i + a3*Penetration + a4*media_autonomy_i
intensity_cls:=a1*d
intensity_cnws:=a1*c
intensity_cnw:=a1*b
diversity_cls:=a2*d
diversity_cnws:=a2*c
diversity_cnw:=a2*b
penetration_cls:=a3*d
penetration_cnws:=a3*c
penetration_cnw:=a3*b
autonomy_cls:=a4*d
autonomy_cnws:=a4*c
autonomy_cnw:=a4*b
"
lmodel3 <- "
CL_pcent ~ spending_GDP + gini_rev_i + Penetration + media_autonomy_i
CNW_pcent ~ spending_GDP + gini_rev_i + Penetration + media_autonomy_i
logCNW ~ spending_GDP + gini_rev_i + Penetration + media_autonomy_i
"
lfit <- sem (model = lmodel, data = thetimes)
lfit2 <- sem (model = lmodel2, data = thetimes)
lfit3 <- sem (model = lmodel3, data = thetimes)
CNWlm_0x <- (lm (CNW_pcent ~ 1 + ccORG1000 + articles, data = thetimes))
CNWlm_0 <- (lm (CNW_pcent ~ 1 , data = thetimes))
CNWlm_0Y0 <- (lm (CNW_pcent ~ 0 + dyear, data = thetimes))
CNWlm_1a <- (lm (CNW_pcent ~ Penetration + media_autonomy_i, data = thetimes))
CNWlm_1b <- (lm (CNW_pcent ~ spending_GDP + gini_rev_i, data = thetimes))
CNWlm_2 <- (lm (CNW_pcent ~ Penetration + media_autonomy_i + spending_GDP + gini_rev_i, data = thetimes))
CNWlm_3 <- (lm (CNW_pcent ~ Penetration + media_autonomy_i + spending_GDP + gini_rev_i + ccORG1000, data = thetimes))
CNWlm_0Y <- (lm (CNW_pcent ~ dyear, data = thetimes))
CNWlm_1aY <- (lm (CNW_pcent ~ Penetration + media_autonomy_i + dyear, data = thetimes))
CNWlm_1bY <- (lm (CNW_pcent ~ spending_GDP + gini_rev_i + dyear, data = thetimes))
CNWlm_2Y <- (lm (CNW_pcent ~ Penetration + media_autonomy_i + spending_GDP + gini_rev_i + dyear, data = thetimes))
CNWlm_3Y <- (lm (CNW_pcent ~ Penetration + media_autonomy_i + spending_GDP + gini_rev_i + ccORG1000 + dyear, data = thetimes))
stargazer (CNWlm_0, CNWlm_1a, CNWlm_1b, CNWlm_2, CNWlm_3, type = "html" , intercept.bottom = FALSE , star.cutoffs = c (.05 , .01 , .001 ), single.row = TRUE )
stargazer (CNWlm_0Y, CNWlm_1aY, CNWlm_1bY, CNWlm_2Y, CNWlm_3Y, type = "html" , intercept.bottom = FALSE , star.cutoffs = c (.05 , .01 , .001 ), single.row = TRUE )
anova (CNWlm_1, CNWlm_2, CNWlm_3)
stargazer (CNWlm_1, CNWlm_2, CNWlm_3, type = "text" )
med_penetration2 <- mediate (model.m = ORGlm_3, model.y = CNWlm_3Y, mediator = "ccORG1000" , treat = "Penetration" )
med_autonomy2 <- mediate (model.m = ORGlm_3, model.y = CNWlm_3Y, mediator = "ccORG1000" , treat = "media_autonomy_i" )
med_spending2 <- mediate (model.m = ORGlm_3, model.y = CNWlm_3Y, mediator = "ccORG1000" , treat = "spending_GDP" )
med_diversity2 <- mediate (model.m = ORGlm_3, model.y = CNWlm_3Y, mediator = "ccORG1000" , treat = "gini_rev_i" )
sensitivity_penetration2 <- medsens (med_penetration2, effect.type = "both" )
sensitivity_autonomy2 <- medsens (med_autonomy2, effect.type = "both" )
sensitivity_spending2 <- medsens (med_spending2, effect.type = "both" )
sensitivity_diversity2 <- medsens (med_diversity2, effect.type = "both" )
thetimes$ logCNW <- log (thetimes$ cnw + 1 )
COUNTlm_0 <- (lm (logCNW ~ 1 , data = thetimes))
COUNTlm_1a <- (lm (logCNW ~ Penetration + media_autonomy_i, data = thetimes))
COUNTlm_1b <- (lm (logCNW ~ spending_GDP + gini_rev_i, data = thetimes))
COUNTlm_1c <- (lm (logCNW ~ ccORG1000, data = thetimes))
COUNTlm_2 <- (lm (logCNW ~ Penetration + media_autonomy_i + spending_GDP + gini_rev_i, data = thetimes))
COUNTlm_3 <- (lm (logCNW ~ Penetration + media_autonomy_i + spending_GDP + gini_rev_i + ccORG1000, data = thetimes))
COUNTlm_0Y <- (lm (logCNW ~ 1 + dyear, data = thetimes))
COUNTlm_1aY <- (lm (logCNW ~ Penetration + media_autonomy_i + dyear, data = thetimes))
COUNTlm_1bY <- (lm (logCNW ~ spending_GDP + gini_rev_i + dyear, data = thetimes))
COUNTlm_1cY <- (lm (logCNW ~ ccORG1000 + dyear, data = thetimes))
COUNTlm_2Y <- (lm (logCNW ~ Penetration + media_autonomy_i + spending_GDP + gini_rev_i + dyear, data = thetimes))
COUNTlm_3Y <- (lm (logCNW ~ Penetration + media_autonomy_i + spending_GDP + gini_rev_i + ccORG1000 + dyear, data = thetimes))
stargazer (COUNTlm_0, COUNTlm_1a, COUNTlm_1b, COUNTlm_1c, COUNTlm_2, COUNTlm_3, type = "html" , intercept.bottom = FALSE , star.cutoffs = c (.05 , .01 , .001 ), single.row = TRUE )
stargazer (COUNTlm_0Y, COUNTlm_1aY, COUNTlm_1bY, COUNTlm_1cY, COUNTlm_2Y, COUNTlm_3Y, type = "html" , intercept.bottom = FALSE , star.cutoffs = c (.05 , .01 , .001 ), single.row = TRUE )
COUNTlm_0 <- (MASS:: glm.nb (cnw ~ 1 , data = thetimes))
COUNTlm_1 <- (glm.nb (cnw ~ Penetration + media_autonomy_i, data = thetimes))
COUNTlm_1b <- (glm.nb (cnw ~ spending_GDP + gini_rev_i, data = thetimes))
COUNTlm_1c <- (glm.nb (cnw ~ ccORG_per_article, data = thetimes))
COUNTlm_2 <- (glm.nb (cnw ~ Penetration + media_autonomy_i + spending_GDP + gini_rev_i, data = thetimes))
COUNTlm_3 <- (glm.nb (I (cnw + 1 ) ~ Penetration + media_autonomy_i + spending_GDP + gini_rev_i + ccORG_per_article, data = thetimes))
COUNTlm_3X <- (glm (cnw ~ Penetration + media_autonomy_i + spending_GDP + gini_rev_i + ccORG_per_article, data = thetimes))
COUNTglm_0 <- (glm (cnw ~ 1 , data = thetimes, family = "poisson" ))
COUNTglm_1 <- (glm (cnw ~ 1 + Penetration + media_autonomy_i, data = thetimes, family = "poisson" ))
COUNTglm_1b <- (glm (cnw ~ 1 + spending_GDP + gini_rev_i, data = thetimes, family = "poisson" ))
COUNTglm_1c <- (glm (cnw ~ 1 + ccORG_per_article, data = thetimes, family = "poisson" ))
COUNTglm_2 <- (glm (cnw ~ 1 + Penetration + media_autonomy_i + spending_GDP + gini_rev_i, data = thetimes, family = "poisson" ))
COUNTglm_3 <- (glm (cnw ~ 1 + Penetration + media_autonomy_i + spending_GDP + gini_rev_i + ccORG_per_article, data = thetimes, family = "poisson" ))
COUNTglm_3 <- (lm (log (cnw + 1 ) ~ 1 + Penetration + media_autonomy_i + spending_GDP + gini_rev_i + ccORG_per_article, data = thetimes))
COUNTziglm_0 <- (zeroinfl (cnw ~ 1 + dyear, data = thetimes, dist = "poisson" ))
COUNTziglm_1 <- (zeroinfl (cnw ~ 1 + Penetration + media_autonomy_i + dyear, data = thetimes, dist = "poisson" ))
COUNTziglm_1b <- (zeroinfl (cnw ~ 1 + spending_GDP + gini_rev_i + dyear, data = thetimes, dist = "poisson" ))
COUNTziglm_1c <- (zeroinfl (cnw ~ 1 + ccORG_per_article + dyear, data = thetimes, dist = "poisson" ))
COUNTziglm_2 <- (zeroinfl (cnw ~ 1 + Penetration + media_autonomy_i + spending_GDP + gini_rev_i + dyear, data = thetimes, dist = "poisson" ))
COUNTziglm_3 <- (zeroinfl (cnw ~ 1 + Penetration + media_autonomy_i + spending_GDP + gini_rev_i + ccORG_per_article + dyear, data = thetimes, dist = "poisson" ))
COUNTlm_0 <- (lm (log (cnw + 1 ) ~ 1 + year, data = thetimes))
COUNTlm_1 <- (lm (log (cnw + 1 ) ~ 1 + Penetration + media_autonomy_i + year, data = thetimes))
COUNTlm_1b <- (lm (log (cnw + 1 ) ~ 1 + spending_GDP + gini_rev_i + year, data = thetimes))
COUNTlm_1c <- (lm (log (cnw + 1 ) ~ 1 + ccORG_per_article + year, data = thetimes))
COUNTlm_2 <- (lm (log (cnw + 1 ) ~ 1 + Penetration + media_autonomy_i + spending_GDP + gini_rev_i + year, data = thetimes))
COUNTlm_3 <- (lm (log (cnw + 1 ) ~ 1 + Penetration + media_autonomy_i + spending_GDP + gini_rev_i + ccORG_per_article + year, data = thetimes))
anova (COUNTlm_0, COUNTlm_1)
anova (COUNTlm_0, COUNTlm_1b)
anova (COUNTlm_0, COUNTlm_1c)
anova (COUNTlm_2, COUNTlm_3)
stargazer (COUNTlm_1, COUNTlm_1b, COUNTlm_1c, COUNTlm_2, COUNTlm_3, type = "text" )
cor.test (thetimes$ CNW, predict.glm (COUNTlm_0))$ est^ 2
cor.test (thetimes$ CNW, predict.glm (COUNTlm_1))$ est^ 2
cor.test (thetimes$ CNW, predict.glm (COUNTlm_1b))$ est^ 2
cor.test (thetimes$ CNW, predict.glm (COUNTlm_1c))$ est^ 2
cor.test (thetimes$ CNW, predict.glm (COUNTlm_2))$ est^ 2
cor.test (thetimes$ CNW, predict.glm (COUNTlm_3))$ est^ 2
med_penetration3 <- mediate (model.m = ORGlm_3, model.y = COUNTlm_3Y, mediator = "ccORG1000" , treat = "Penetration" )
med_autonomy3 <- mediate (model.m = ORGlm_3, model.y = COUNTlm_3Y, mediator = "ccORG1000" , treat = "media_autonomy_i" )
med_spending3 <- mediate (model.m = ORGlm_3, model.y = COUNTlm_3Y, mediator = "ccORG1000" , treat = "spending_GDP" )
med_diversity3 <- mediate (model.m = ORGlm_3, model.y = COUNTlm_3Y, mediator = "ccORG1000" , treat = "gini_rev_i" )
med_penetration3X <- mediate (model.m = ORGlm_1, model.y = COUNTlm_3X, mediator = "ccORG1000" , treat = "Penetration" )
med_autonomy3X <- mediate (model.m = ORGlm_1, model.y = COUNTlm_3X, mediator = "ccORG1000" , treat = "media_autonomy_i" )
med_spending3X <- mediate (model.m = ORGlm_2, model.y = COUNTlm_3X, mediator = "ccORG1000" , treat = "spending_GDP" )
med_diversity3X <- mediate (model.m = ORGlm_2, model.y = COUNTlm_3X, mediator = "ccORG1000" , treat = "gini_rev_i" )
sensitivity_penetration3 <- medsens (med_penetration3, effect.type = "both" )
sensitivity_autonomy3 <- medsens (med_autonomy3, effect.type = "both" )
sensitivity_spending3 <- medsens (med_spending3, effect.type = "both" )
sensitivity_diversity3 <- medsens (med_diversity3, effect.type = "both" )
thetimes$ CL_z <- 100 * scale (thetimes$ CL_pcent, scale = TRUE )
thetimes$ CNW_z <- 100 * scale (thetimes$ CNW_pcent, scale = TRUE )
thetimes$ CL_mmx <- (thetimes$ CL_pcent / max (thetimes$ CL_pcent, na.rm = TRUE ))
thetimes$ CNW_mmx <- (thetimes$ CNW_pcent / max (thetimes$ CNW_pcent, na.rm = TRUE ))
thetimes$ CL_mm <- 100 * (thetimes$ CL_mmx - mean (thetimes$ CL_mmx, na.rm = TRUE ))
thetimes$ CNW_mm <- 100 * (thetimes$ CNW_mmx - mean (thetimes$ CNW_mmx, na.rm = TRUE ))
CNWlm_0z <- (lm (CNW_z ~ 1 , data = thetimes))
CNWlm_tz <- (lm (CNW_z ~ dyear, data = thetimes))
CNWlm_1z <- (lm (CNW_z ~ Penetration + media_autonomy_i, data = thetimes))
CNWlm_2z <- (lm (CNW_z ~ spending_GDP + gini_rev_i, data = thetimes))
CNWlm_3z <- (lm (CNW_z ~ Penetration + media_autonomy_i + spending_GDP + gini_rev_i, data = thetimes))
CNWlm_4z <- (lm (CNW_z ~ Penetration + media_autonomy_i + spending_GDP + gini_rev_i + ccORG1000, data = thetimes))
CNWlm_0mm <- (lm (CNW_mm ~ 1 , data = thetimes))
CNWlm_tmm <- (lm (CNW_mm ~ dyear, data = thetimes))
CNWlm_1mm <- (lm (CNW_mm ~ Penetration + media_autonomy_i, data = thetimes))
CNWlm_2mm <- (lm (CNW_mm ~ spending_GDP + gini_rev_i, data = thetimes))
CNWlm_3mm <- (lm (CNW_mm ~ Penetration + media_autonomy_i + spending_GDP + gini_rev_i, data = thetimes))
CNWlm_4mm <- (lm (CNW_mm ~ Penetration + media_autonomy_i + spending_GDP + gini_rev_i + ccORG1000, data = thetimes))
CLlm_0z <- (lm (CL_z ~ 1 , data = thetimes))
CLlm_tz <- (lm (CL_z ~ dyear, data = thetimes))
CLlm_1z <- (lm (CL_z ~ Penetration + media_autonomy_i, data = thetimes))
CLlm_2z <- (lm (CL_z ~ spending_GDP + gini_rev_i, data = thetimes))
CLlm_3z <- (lm (CL_z ~ Penetration + media_autonomy_i + spending_GDP + gini_rev_i, data = thetimes))
CLlm_4z <- (lm (CL_z ~ Penetration + media_autonomy_i + spending_GDP + gini_rev_i + ccORG1000, data = thetimes))
CLlm_0mm <- (lm (CL_mm ~ 1 , data = thetimes))
CLlm_tmm <- (lm (CL_mm ~ dyear, data = thetimes))
CLlm_1mm <- (lm (CL_mm ~ Penetration + media_autonomy_i, data = thetimes))
CLlm_2mm <- (lm (CL_mm ~ spending_GDP + gini_rev_i, data = thetimes))
CLlm_3mm <- (lm (CL_mm ~ Penetration + media_autonomy_i + spending_GDP + gini_rev_i, data = thetimes))
CLlm_4mm <- (lm (CL_mm ~ Penetration + media_autonomy_i + spending_GDP + gini_rev_i + ccORG1000, data = thetimes))
df_comp <- data.frame (
Criterion = rep (c ("CL" , "CNW" ), each = 18 ),
Predictor = factor (rep (rep (c ("Time" , "Crisis Sponsors" , "Media Penetr." , "Media Auton." , "Gov. sp. intensity" , "Gov. sp. diversity" ), each = 3 ), times = 2 ), ordered = TRUE , levels = c ("Time" , "Crisis Sponsors" , "Media Penetr." , "Media Auton." , "Gov. sp. intensity" , "Gov. sp. diversity" )),
Standardization = factor (rep (c ("Unstandardized" , "MixMax-standardized" , "z-standardized" ), times = 12 ), ordered = TRUE , levels = c ("Unstandardized" , "MixMax-standardized" , "z-standardized" )),
coef =
c (
CLlm_t$ coef[2 ], CLlm_tmm$ coef[2 ], CLlm_tz$ coef[2 ],
CLlm_4$ coef[6 ], CLlm_4mm$ coef[6 ], CLlm_4z$ coef[6 ],
CLlm_1$ coef[2 ], CLlm_1mm$ coef[2 ], CLlm_1z$ coef[2 ],
CLlm_1$ coef[3 ], CLlm_1mm$ coef[3 ], CLlm_1z$ coef[3 ],
CLlm_2$ coef[2 ], CLlm_2mm$ coef[2 ], CLlm_2z$ coef[2 ],
CLlm_2$ coef[3 ], CLlm_2mm$ coef[3 ], CLlm_2z$ coef[3 ],
CNWlm_t$ coef[2 ], CNWlm_tmm$ coef[2 ], CNWlm_tz$ coef[2 ],
CNWlm_4$ coef[6 ], CNWlm_4mm$ coef[6 ], CNWlm_4z$ coef[6 ],
CNWlm_1$ coef[2 ], CNWlm_1mm$ coef[2 ], CNWlm_1z$ coef[2 ],
CNWlm_1$ coef[3 ], CNWlm_1mm$ coef[3 ], CNWlm_1z$ coef[3 ],
CNWlm_2$ coef[2 ], CNWlm_2mm$ coef[2 ], CNWlm_2z$ coef[2 ],
CNWlm_2$ coef[3 ], CNWlm_2mm$ coef[3 ], CNWlm_2z$ coef[3 ]
),
LL95 =
c (
confint (CLlm_t)[2 , 1 ], confint (CLlm_tmm)[2 , 1 ], confint (CLlm_tz)[2 , 1 ],
confint (CLlm_4)[6 , 1 ], confint (CLlm_4mm)[6 , 1 ], confint (CLlm_4z)[6 , 1 ],
confint (CLlm_1)[2 , 1 ], confint (CLlm_1mm)[2 , 1 ], confint (CLlm_1z)[2 , 1 ],
confint (CLlm_1)[3 , 1 ], confint (CLlm_1mm)[3 , 1 ], confint (CLlm_1z)[3 , 1 ],
confint (CLlm_2)[2 , 1 ], confint (CLlm_2mm)[2 , 1 ], confint (CLlm_2z)[2 , 1 ],
confint (CLlm_2)[3 , 1 ], confint (CLlm_2mm)[3 , 1 ], confint (CLlm_2z)[3 , 1 ],
confint (CNWlm_t)[2 , 1 ], confint (CNWlm_tmm)[2 , 1 ], confint (CNWlm_tz)[2 , 1 ],
confint (CNWlm_4)[6 , 1 ], confint (CNWlm_4mm)[6 , 1 ], confint (CNWlm_4z)[6 , 1 ],
confint (CNWlm_1)[2 , 1 ], confint (CNWlm_1mm)[2 , 1 ], confint (CNWlm_1z)[2 , 1 ],
confint (CNWlm_1)[3 , 1 ], confint (CNWlm_1mm)[3 , 1 ], confint (CNWlm_1z)[3 , 1 ],
confint (CNWlm_2)[2 , 1 ], confint (CNWlm_2mm)[2 , 1 ], confint (CNWlm_2z)[2 , 1 ],
confint (CNWlm_2)[3 , 1 ], confint (CNWlm_2mm)[3 , 1 ], confint (CNWlm_2z)[3 , 1 ]
),
UL95 =
c (
confint (CLlm_t)[2 , 2 ], confint (CLlm_tmm)[2 , 2 ], confint (CLlm_tz)[2 , 2 ],
confint (CLlm_4)[6 , 2 ], confint (CLlm_4mm)[6 , 2 ], confint (CLlm_4z)[6 , 2 ],
confint (CLlm_1)[2 , 2 ], confint (CLlm_1mm)[2 , 2 ], confint (CLlm_1z)[2 , 2 ],
confint (CLlm_1)[3 , 2 ], confint (CLlm_1mm)[3 , 2 ], confint (CLlm_1z)[3 , 2 ],
confint (CLlm_2)[2 , 2 ], confint (CLlm_2mm)[2 , 2 ], confint (CLlm_2z)[2 , 2 ],
confint (CLlm_2)[3 , 2 ], confint (CLlm_2mm)[3 , 2 ], confint (CLlm_2z)[3 , 2 ],
confint (CNWlm_t)[2 , 2 ], confint (CNWlm_tmm)[2 , 2 ], confint (CNWlm_tz)[2 , 2 ],
confint (CNWlm_4)[6 , 2 ], confint (CNWlm_4mm)[6 , 2 ], confint (CNWlm_4z)[6 , 2 ],
confint (CNWlm_1)[2 , 2 ], confint (CNWlm_1mm)[2 , 2 ], confint (CNWlm_1z)[2 , 2 ],
confint (CNWlm_1)[3 , 2 ], confint (CNWlm_1mm)[3 , 2 ], confint (CNWlm_1z)[3 , 2 ],
confint (CNWlm_2)[2 , 2 ], confint (CNWlm_2mm)[2 , 2 ], confint (CNWlm_2z)[2 , 2 ],
confint (CNWlm_2)[3 , 2 ], confint (CNWlm_2mm)[3 , 2 ], confint (CNWlm_2z)[3 , 2 ]
),
maxim =
c (
CLlm_t$ coef[2 ], CLlm_tmm$ coef[2 ], CLlm_tz$ coef[2 ],
CLlm_4$ coef[6 ], CLlm_4mm$ coef[6 ], CLlm_4z$ coef[6 ],
CLlm_1$ coef[2 ], CLlm_1mm$ coef[2 ], CLlm_1z$ coef[2 ],
CLlm_1$ coef[3 ], CLlm_1mm$ coef[3 ], CLlm_1z$ coef[3 ],
CLlm_2$ coef[2 ], CLlm_2mm$ coef[2 ], CLlm_2z$ coef[2 ],
CLlm_2$ coef[3 ], CLlm_2mm$ coef[3 ], CLlm_2z$ coef[3 ],
CLlm_t$ coef[2 ], CLlm_tmm$ coef[2 ], CLlm_tz$ coef[2 ],
CLlm_4$ coef[6 ], CLlm_4mm$ coef[6 ], CLlm_4z$ coef[6 ],
CLlm_1$ coef[2 ], CLlm_1mm$ coef[2 ], CLlm_1z$ coef[2 ],
CLlm_1$ coef[3 ], CLlm_1mm$ coef[3 ], CLlm_1z$ coef[3 ],
CLlm_2$ coef[2 ], CLlm_2mm$ coef[2 ], CLlm_2z$ coef[2 ],
CLlm_2$ coef[3 ], CLlm_2mm$ coef[3 ], CLlm_2z$ coef[3 ]
)
)
df_comp$ coef_1 <- df_comp$ coef / df_comp$ maxim
df_comp$ LL95_1 <- df_comp$ LL95 / df_comp$ maxim
df_comp$ UL95_1 <- df_comp$ UL95 / df_comp$ maxim
gg_effsizecomp <- ggplot (df_comp, aes (y = coef_1, ymin = LL95_1, ymax = UL95_1, x = Criterion, shape = Criterion, color = Criterion, fill = Criterion)) +
geom_crossbar (width = 0.9 , alpha = .25 ) +
geom_point (size = 2.25 ) +
facet_grid (Predictor ~ Standardization) +
scale_y_continuous (breaks = seq (0 , 1 , 0.25 ), limits = c (- 1.0 , 1.5 )) +
geom_hline (yintercept = 0 , linetype = "dashed" , color = "red" ) +
theme_soft () +
scale_color_viridis_d (option = "viridis" , begin = 0 , end = 0.65 ) +
xlab ("Dependent variable" ) +
ylab ("Regression coefficient, scaled to 0-1 range" ) +
theme (legend.position = "bottom" ) +
ggtitle ("Comparing effect sizes with different standardization methods" )
CLlm_t <- (lm (100 * CL_pcent ~ dyear, data = thetimes))
CLlm_1 <- (lm (100 * CL_pcent ~ Penetration + media_autonomy_i, data = thetimes))
CLlm_2 <- (lm (100 * CL_pcent ~ spending_GDP + gini_rev_i, data = thetimes))
CLlm_3 <- (lm (100 * CL_pcent ~ Penetration + media_autonomy_i + spending_GDP + gini_rev_i, data = thetimes))
CLlm_4 <- (lm (100 * CL_pcent ~ Penetration + media_autonomy_i + spending_GDP + gini_rev_i + ccORG1000, data = thetimes))
CNWlm_0 <- (lm (100 * CNW_pcent ~ 1 , data = thetimes))
CNWlm_t <- (lm (100 * CNW_pcent ~ dyear, data = thetimes))
CNWlm_1 <- (lm (100 * CNW_pcent ~ Penetration + media_autonomy_i, data = thetimes))
CNWlm_2 <- (lm (100 * CNW_pcent ~ spending_GDP + gini_rev_i, data = thetimes))
CNWlm_3 <- (lm (100 * CNW_pcent ~ Penetration + media_autonomy_i + spending_GDP + gini_rev_i, data = thetimes))
CNWlm_4 <- (lm (100 * CNW_pcent ~ Penetration + media_autonomy_i + spending_GDP + gini_rev_i + ccORG1000, data = thetimes))
ggsave (gg_year_waves, file = "wave_count_4np.png" , unit = "cm" , width = 16 , height = 16 , dpi = 1200 , scale = 1.25 )
ggsave (gg_year_waveshare, file = "wave_share_4np.png" , unit = "cm" , width = 16 , height = 16 , dpi = 1200 , scale = 1.25 )
ggsave (gg_wave_duration, file = "wave_duration_4np.png" , unit = "cm" , width = 16 , height = 16 , dpi = 1200 , scale = 1.25 )
ggsave (gg_year_clshare, file = "cl_share_4np.png" , unit = "cm" , width = 16 , height = 16 , dpi = 1200 , scale = 1.25 )
ggsave (gg_year_clarticles, file = "cl_articles_4np.png" , unit = "cm" , width = 16 , height = 16 , dpi = 1200 , scale = 1.25 )
ggsave (gg_year_articles, file = "articles_4np.png" , unit = "cm" , width = 16 , height = 16 , dpi = 1200 , scale = 1.25 )
ggsave (gg_year_wavearticles, file = "wave_articles_4np.png" , unit = "cm" , width = 16 , height = 16 , dpi = 1200 , scale = 1.25 )
M1.X <- (lm (CL_pcent ~ 1 + Penetration + media_autonomy_i + spending_GDP + gini_rev_i + ccORG1000 + dyear, data = thetimes))
M1.0 <- (lm (CL_pcent ~ 1 , data = M1.X$ model))
M1.1 <- (lm (CL_pcent ~ 1 + dyear, data = M1.X$ model))
M1.2 <- (lm (CL_pcent ~ 1 + Penetration + media_autonomy_i, data = M1.X$ model))
M1.3 <- (lm (CL_pcent ~ 1 + spending_GDP + gini_rev_i, data = M1.X$ model))
M1.4 <- (lm (CL_pcent ~ 1 + ccORG1000, data = M1.X$ model))
M1.5 <- (lm (CL_pcent ~ 1 + Penetration + media_autonomy_i + spending_GDP + gini_rev_i, data = M1.X$ model))
M1.6 <- (lm (CL_pcent ~ 1 + Penetration + media_autonomy_i + spending_GDP + gini_rev_i + ccORG1000, data = M1.X$ model))
M1.7 <- (lm (CL_pcent ~ 1 + Penetration + media_autonomy_i + spending_GDP + gini_rev_i + ccORG1000 + dyear, data = M1.X$ model))
M2.X <- (lm (CNW_pcent ~ 1 + Penetration + media_autonomy_i + spending_GDP + gini_rev_i + ccORG1000 + dyear, data = thetimes))
M2.0 <- (lm (CNW_pcent ~ 1 , data = M2.X$ model))
M2.1 <- (lm (CNW_pcent ~ 1 + dyear, data = M2.X$ model))
M2.2 <- (lm (CNW_pcent ~ 1 + Penetration + media_autonomy_i, data = M2.X$ model))
M2.3 <- (lm (CNW_pcent ~ 1 + spending_GDP + gini_rev_i, data = M2.X$ model))
M2.4 <- (lm (CNW_pcent ~ 1 + ccORG1000, data = M2.X$ model))
M2.5 <- (lm (CNW_pcent ~ 1 + Penetration + media_autonomy_i + spending_GDP + gini_rev_i, data = M2.X$ model))
M2.6 <- (lm (CNW_pcent ~ 1 + Penetration + media_autonomy_i + spending_GDP + gini_rev_i + ccORG1000, data = M2.X$ model))
M2.7 <- (lm (CNW_pcent ~ 1 + Penetration + media_autonomy_i + spending_GDP + gini_rev_i + ccORG1000 + dyear, data = M2.X$ model))
M3.X <- (lm (logCNW ~ 1 + Penetration + media_autonomy_i + spending_GDP + gini_rev_i + ccORG1000 + dyear, data = thetimes))
M3.0 <- (lm (logCNW ~ 1 , data = M3.X$ model))
M3.1 <- (lm (logCNW ~ 1 + dyear, data = M3.X$ model))
M3.2 <- (lm (logCNW ~ 1 + Penetration + media_autonomy_i, data = M3.X$ model))
M3.3 <- (lm (logCNW ~ 1 + spending_GDP + gini_rev_i, data = M3.X$ model))
M3.4 <- (lm (logCNW ~ 1 + ccORG1000, data = M3.X$ model))
M3.5 <- (lm (logCNW ~ 1 + Penetration + media_autonomy_i + spending_GDP + gini_rev_i, data = M3.X$ model))
M3.6 <- (lm (logCNW ~ 1 + Penetration + media_autonomy_i + spending_GDP + gini_rev_i + ccORG1000, data = M3.X$ model))
M3.7 <- (lm (logCNW ~ 1 + Penetration + media_autonomy_i + spending_GDP + gini_rev_i + ccORG1000 + dyear, data = M3.X$ model))
M4.X <- (lm (ccORG1000 ~ 1 + Penetration + media_autonomy_i + spending_GDP + gini_rev_i + dyear, data = thetimes))
M4.0 <- (lm (ccORG1000 ~ 1 + Penetration + media_autonomy_i, data = M4.X$ model))
M4.1 <- (lm (ccORG1000 ~ 1 + spending_GDP + gini_rev_i, data = M4.X$ model))
M4.2 <- (lm (ccORG1000 ~ 1 + Penetration + media_autonomy_i + spending_GDP + gini_rev_i, data = M4.X$ model))
M4.3 <- (lm (ccORG1000 ~ 1 + Penetration + media_autonomy_i + spending_GDP + gini_rev_i + dyear, data = M4.X$ model))
anova (M1.0 , M1.1 )
anova (M1.0 , M1.4 )
anova (M1.0 , M1.2 , M1.5 , M1.6 , M1.7 )
anova (M1.0 , M1.3 , M1.5 , M1.6 , M1.7 )
M1_r2_all <- cbind (r2 (M1.0 ), r2 (M1.1 ), r2 (M1.2 ), r2 (M1.3 ), r2 (M1.4 ), r2 (M1.5 ), r2 (M1.6 ), r2 (M1.7 ))
M1_r2_ml_first <- cbind (r2 (M1.0 ), r2 (M1.2 ), r2 (M1.5 ), r2 (M1.6 ), r2 (M1.7 ))
M1_r2_gs_first <- cbind (r2 (M1.0 ), r2 (M1.3 ), r2 (M1.5 ), r2 (M1.6 ), r2 (M1.7 ))
stargazer (M1.0 , M1.2 , M1.5 , M1.6 , M1.7 , M1.1 , type = "text" , intercept.bottom = FALSE , star.cutoffs = c (.05 , .01 , .001 ), single.row = TRUE )
stargazer (M1.2 , M1.5 , M1.6 , M1.7 , M1.1 , type = "text" , intercept.bottom = FALSE , star.cutoffs = c (.05 , .01 , .001 ), single.row = TRUE )
stargazer (M1.2 , M1.5 , M1.6 , M1.7 , M1.1 , type = "latex" , intercept.bottom = FALSE , star.cutoffs = c (.05 , .01 , .001 ), single.row = FALSE )
anova (M2.0 , M2.1 )
anova (M2.0 , M2.4 )
anova (M2.0 , M2.2 , M2.5 , M2.6 , M2.7 )
anova (M2.0 , M2.3 , M2.5 , M2.6 , M2.7 )
M2_r2_all <- cbind (r2 (M2.0 ), r2 (M2.1 ), r2 (M2.2 ), r2 (M2.3 ), r2 (M2.4 ), r2 (M2.5 ), r2 (M2.6 ), r2 (M2.7 ))
M2_r2_ml_first <- cbind (r2 (M2.0 ), r2 (M2.2 ), r2 (M2.5 ), r2 (M2.6 ), r2 (M2.7 ))
M2_r2_gs_first <- cbind (r2 (M2.0 ), r2 (M2.3 ), r2 (M2.5 ), r2 (M2.6 ), r2 (M2.7 ))
stargazer (M2.0 , M2.2 , M2.5 , M2.6 , M2.7 , M2.1 , type = "text" , intercept.bottom = FALSE , star.cutoffs = c (.05 , .01 , .001 ), single.row = TRUE )
stargazer (M2.2 , M2.5 , M2.6 , M2.7 , M2.1 , type = "text" , intercept.bottom = FALSE , star.cutoffs = c (.05 , .01 , .001 ), single.row = TRUE )
stargazer (M2.2 , M2.5 , M2.6 , M2.7 , M2.1 , type = "latex" , intercept.bottom = FALSE , star.cutoffs = c (.05 , .01 , .001 ), single.row = FALSE )
anova (M3.0 , M3.1 )
anova (M3.0 , M3.4 )
anova (M3.0 , M3.2 , M3.5 , M3.6 , M3.7 )
anova (M3.0 , M3.3 , M3.5 , M3.6 , M3.7 )
M3_r2_all <- cbind (r2 (M3.0 ), r2 (M3.1 ), r2 (M3.2 ), r2 (M3.3 ), r2 (M3.4 ), r2 (M3.5 ), r2 (M3.6 ), r2 (M3.7 ))
M3_r2_ml_first <- cbind (r2 (M3.0 ), r2 (M3.2 ), r2 (M3.5 ), r2 (M3.6 ), r2 (M3.7 ))
M3_r2_gs_first <- cbind (r2 (M3.0 ), r2 (M3.3 ), r2 (M3.5 ), r2 (M3.6 ), r2 (M3.7 ))
stargazer (M3.0 , M3.2 , M3.5 , M3.6 , M3.7 , M3.1 , type = "text" , intercept.bottom = FALSE , star.cutoffs = c (.05 , .01 , .001 ), single.row = TRUE )
stargazer (M3.2 , M3.5 , M3.6 , M3.7 , M3.1 , type = "text" , intercept.bottom = FALSE , star.cutoffs = c (.05 , .01 , .001 ), single.row = TRUE )
stargazer (M1.5 , M1.7 , M2.5 , M2.7 , M3.5 , M3.7 , type = "text" , intercept.bottom = FALSE , star.cutoffs = c (.05 , .01 , .001 ), single.row = FALSE )
stargazer (M1.7 , M2.7 , M3.7 , type = "text" , intercept.bottom = FALSE , star.cutoffs = c (.05 , .01 , .001 ), single.row = FALSE )
stargazer (M3.2 , M3.5 , M3.6 , M3.7 , M3.1 , type = "latex" , intercept.bottom = FALSE , star.cutoffs = c (.05 , .01 , .001 ), single.row = FALSE )
etaSquared (M1.7 )
M1.8 _MP <- mediate (model.y = M1.6 , model.m = M4.2 , treat = "Penetration" , mediator = "ccORG1000" )
M1.8 _MA <- mediate (model.y = M1.6 , model.m = M4.2 , treat = "media_autonomy_i" , mediator = "ccORG1000" )
M1.8 _SI <- mediate (model.y = M1.6 , model.m = M4.2 , treat = "spending_GDP" , mediator = "ccORG1000" )
M1.8 _SD <- mediate (model.y = M1.6 , model.m = M4.2 , treat = "gini_rev_i" , mediator = "ccORG1000" )
df_mediation_M1 <- data.frame (
Criterion = c ("CL Salience" , "CL Salience" , "CL Salience" , "CL Salience" ),
Treatment = c ("Media Penetration" , "Media Autonomy" , "Spending Intensity" , "Spending Diversity" ),
Mediator = c ("ccORG1000" , "ccORG1000" , "ccORG1000" , "ccORG1000" ),
IE.est = c (M1.8 _MP$ d0, M1.8 _MA$ d0, M1.8 _SI$ d0, M1.8 _SD$ d0),
IE.LL95 = c (M1.8 _MP$ d0.ci[1 ], M1.8 _MA$ d0.ci[1 ], M1.8 _SI$ d0.ci[1 ], M1.8 _SD$ d0.ci[1 ]),
IE.UL95 = c (M1.8 _MP$ d0.ci[2 ], M1.8 _MA$ d0.ci[2 ], M1.8 _SI$ d0.ci[2 ], M1.8 _SD$ d0.ci[2 ]),
IE.p = c (M1.8 _MP$ d0.p, M1.8 _MA$ d0.p, M1.8 _SI$ d0.p, M1.8 _SD$ d0.p),
DE.est = c (M1.8 _MP$ z0, M1.8 _MA$ z0, M1.8 _SI$ z0, M1.8 _SD$ z0),
DE.LL95 = c (M1.8 _MP$ z0.ci[1 ], M1.8 _MA$ z0.ci[1 ], M1.8 _SI$ z0.ci[1 ], M1.8 _SD$ z0.ci[1 ]),
DE.UL95 = c (M1.8 _MP$ z0.ci[2 ], M1.8 _MA$ z0.ci[2 ], M1.8 _SI$ z0.ci[2 ], M1.8 _SD$ z0.ci[2 ]),
DE.p = c (M1.8 _MP$ z0.p, M1.8 _MA$ z0.p, M1.8 _SI$ z0.p, M1.8 _SD$ z0.p),
TE.est = c (M1.8 _MP$ tau.coef, M1.8 _MA$ tau.coef, M1.8 _SI$ tau.coef, M1.8 _SD$ tau.coef),
TE.LL95 = c (M1.8 _MP$ tau.ci[1 ], M1.8 _MA$ tau.ci[1 ], M1.8 _SI$ tau.ci[1 ], M1.8 _SD$ tau.ci[1 ]),
TE.UL95 = c (M1.8 _MP$ tau.ci[2 ], M1.8 _MA$ tau.ci[2 ], M1.8 _SI$ tau.ci[2 ], M1.8 _SD$ tau.ci[2 ]),
TE.p = c (M1.8 _MP$ tau.p, M1.8 _MA$ tau.p, M1.8 _SI$ tau.p, M1.8 _SD$ tau.p)
)
df_mediation_M2 <- data.frame (
Criterion = c ("CNW Salience" , "CNW Salience" , "CNW Salience" , "CNW Salience" ),
Treatment = c ("Media Penetration" , "Media Autonomy" , "Spending Intensity" , "Spending Diversity" ),
Mediator = c ("ccORG1000" , "ccORG1000" , "ccORG1000" , "ccORG1000" ),
IE.est = c (M2.8 _MP$ d0, M2.8 _MA$ d0, M2.8 _SI$ d0, M2.8 _SD$ d0),
IE.LL95 = c (M2.8 _MP$ d0.ci[1 ], M2.8 _MA$ d0.ci[1 ], M2.8 _SI$ d0.ci[1 ], M2.8 _SD$ d0.ci[1 ]),
IE.UL95 = c (M2.8 _MP$ d0.ci[2 ], M2.8 _MA$ d0.ci[2 ], M2.8 _SI$ d0.ci[2 ], M2.8 _SD$ d0.ci[2 ]),
IE.p = c (M2.8 _MP$ d0.p, M2.8 _MA$ d0.p, M2.8 _SI$ d0.p, M2.8 _SD$ d0.p),
DE.est = c (M2.8 _MP$ z0, M2.8 _MA$ z0, M2.8 _SI$ z0, M2.8 _SD$ z0),
DE.LL95 = c (M2.8 _MP$ z0.ci[1 ], M2.8 _MA$ z0.ci[1 ], M2.8 _SI$ z0.ci[1 ], M2.8 _SD$ z0.ci[1 ]),
DE.UL95 = c (M2.8 _MP$ z0.ci[2 ], M2.8 _MA$ z0.ci[2 ], M2.8 _SI$ z0.ci[2 ], M2.8 _SD$ z0.ci[2 ]),
DE.p = c (M2.8 _MP$ z0.p, M2.8 _MA$ z0.p, M2.8 _SI$ z0.p, M2.8 _SD$ z0.p),
TE.est = c (M2.8 _MP$ tau.coef, M2.8 _MA$ tau.coef, M2.8 _SI$ tau.coef, M2.8 _SD$ tau.coef),
TE.LL95 = c (M2.8 _MP$ tau.ci[1 ], M2.8 _MA$ tau.ci[1 ], M2.8 _SI$ tau.ci[1 ], M2.8 _SD$ tau.ci[1 ]),
TE.UL95 = c (M2.8 _MP$ tau.ci[2 ], M2.8 _MA$ tau.ci[2 ], M2.8 _SI$ tau.ci[2 ], M2.8 _SD$ tau.ci[2 ]),
TE.p = c (M2.8 _MP$ tau.p, M2.8 _MA$ tau.p, M2.8 _SI$ tau.p, M2.8 _SD$ tau.p)
)
df_mediation_M3 <- data.frame (
Criterion = c ("CNW Count" , "CNW Count" , "CNW Count" , "CNW Count" ),
Treatment = c ("Media Penetration" , "Media Autonomy" , "Spending Intensity" , "Spending Diversity" ),
Mediator = c ("ccORG1000" , "ccORG1000" , "ccORG1000" , "ccORG1000" ),
IE.est = c (M3.8 _MP$ d0, M3.8 _MA$ d0, M3.8 _SI$ d0, M3.8 _SD$ d0),
IE.LL95 = c (M3.8 _MP$ d0.ci[1 ], M3.8 _MA$ d0.ci[1 ], M3.8 _SI$ d0.ci[1 ], M3.8 _SD$ d0.ci[1 ]),
IE.UL95 = c (M3.8 _MP$ d0.ci[2 ], M3.8 _MA$ d0.ci[2 ], M3.8 _SI$ d0.ci[2 ], M3.8 _SD$ d0.ci[2 ]),
IE.p = c (M3.8 _MP$ d0.p, M3.8 _MA$ d0.p, M3.8 _SI$ d0.p, M3.8 _SD$ d0.p),
DE.est = c (M3.8 _MP$ z0, M3.8 _MA$ z0, M3.8 _SI$ z0, M3.8 _SD$ z0),
DE.LL95 = c (M3.8 _MP$ z0.ci[1 ], M3.8 _MA$ z0.ci[1 ], M3.8 _SI$ z0.ci[1 ], M3.8 _SD$ z0.ci[1 ]),
DE.UL95 = c (M3.8 _MP$ z0.ci[2 ], M3.8 _MA$ z0.ci[2 ], M3.8 _SI$ z0.ci[2 ], M3.8 _SD$ z0.ci[2 ]),
DE.p = c (M3.8 _MP$ z0.p, M3.8 _MA$ z0.p, M3.8 _SI$ z0.p, M3.8 _SD$ z0.p),
TE.est = c (M3.8 _MP$ tau.coef, M3.8 _MA$ tau.coef, M3.8 _SI$ tau.coef, M3.8 _SD$ tau.coef),
TE.LL95 = c (M3.8 _MP$ tau.ci[1 ], M3.8 _MA$ tau.ci[1 ], M3.8 _SI$ tau.ci[1 ], M3.8 _SD$ tau.ci[1 ]),
TE.UL95 = c (M3.8 _MP$ tau.ci[2 ], M3.8 _MA$ tau.ci[2 ], M3.8 _SI$ tau.ci[2 ], M3.8 _SD$ tau.ci[2 ]),
TE.p = c (M3.8 _MP$ tau.p, M3.8 _MA$ tau.p, M3.8 _SI$ tau.p, M3.8 _SD$ tau.p)
)
df_mediation <- rbind (df_mediation_M1, df_mediation_M2, df_mediation_M3)
df_mediation_long <- data.frame (
Criterion = factor (rep (df_mediation$ Criterion, times = 3 ), ordered = TRUE , levels = c ("CL Salience" , "CNW Salience" , "CNW Count" )),
Treatment = factor (rep (df_mediation$ Treatment, times = 3 ), ordered = TRUE , levels = c ("Media Penetration" , "Media Autonomy" , "Spending Intensity" , "Spending Diversity" )),
Mediator = rep (df_mediation$ Mediator, times = 3 ),
Effect = factor (rep (c ("Indirect" , "Direct" , "Total" ), each = 12 ), ordered = TRUE , levels = c ("Indirect" , "Direct" , "Total" )),
Est = c (df_mediation$ IE.est, df_mediation$ DE.est, df_mediation$ TE.est),
LL95 = c (df_mediation$ IE.LL95, df_mediation$ DE.LL95, df_mediation$ TE.LL95),
UL95 = c (df_mediation$ IE.UL95, df_mediation$ DE.UL95, df_mediation$ TE.UL95)
)
ggplot (df_mediation_long, aes (ymin = LL95, ymax = UL95, y = Est, x = Criterion, group = Effect, color = Effect, shape = Effect)) +
geom_hline (yintercept = 0 , linetype = "dashed" ) +
geom_pointrange (position = position_dodge (0.75 )) +
facet_grid (Treatment ~ .) +
coord_flip () +
theme_soft () +
scale_color_viridis_d (begin = 0 , end = .85 )
gg_mediation <- ggplot (df_mediation_long, aes (ymin = LL95, ymax = UL95, y = Est, x = Criterion, group = Effect, color = Effect, shape = Effect)) +
geom_hline (yintercept = 0 , linetype = "dashed" ) +
geom_pointrange (position = position_dodge (0.75 )) +
facet_grid (Treatment ~ ., scales = "free_y" ) +
theme_soft () +
scale_color_viridis_d (begin = 0 , end = .7 ) +
ylab ("Estimate" )
gg_mediation <- ggplot (df_mediation_long, aes (ymin = LL95, ymax = UL95, y = Est, x = Effect, group = Effect, color = Effect, shape = Effect)) +
geom_hline (yintercept = 0 , linetype = "dashed" ) +
geom_errorbar (width = 0.9 , position = position_dodge (0.75 )) +
geom_point (size = 2.0 ) +
facet_grid (Treatment ~ Criterion, scales = "free_y" ) +
theme_soft () +
scale_color_viridis_d (begin = 0 , end = .7 ) +
ylab ("Estimate" ) +
theme (legend.position = "bottom" ) +
ggtitle ("Direct, indirect and total effects of predictors" )
gg_mediation_effsizecomp <- grid.arrange (gg_mediation, gg_effsizecomp, ncol = 2 )
ggsave (gg_mediation, file = "mediation.png" , unit = "cm" , width = 12 , height = 12 , dpi = 1200 , scale = 1.25 )
ggsave (gg_mediation_effsizecomp, file = "mediation_effsizecomp.png" , unit = "cm" , width = 16 , height = 16 , dpi = 1200 , scale = 1.85 )
M2.8 _MP <- mediate (model.y = M2.6 , model.m = M4.2 , treat = "Penetration" , mediator = "ccORG1000" )
M2.8 _MA <- mediate (model.y = M2.6 , model.m = M4.2 , treat = "media_autonomy_i" , mediator = "ccORG1000" )
M2.8 _SI <- mediate (model.y = M2.6 , model.m = M4.2 , treat = "spending_GDP" , mediator = "ccORG1000" )
M2.8 _SD <- mediate (model.y = M2.6 , model.m = M4.2 , treat = "gini_rev_i" , mediator = "ccORG1000" )
M3.8 _MP <- mediate (model.y = M3.6 , model.m = M4.2 , treat = "Penetration" , mediator = "ccORG1000" )
M3.8 _MA <- mediate (model.y = M3.6 , model.m = M4.2 , treat = "media_autonomy_i" , mediator = "ccORG1000" )
M3.8 _SI <- mediate (model.y = M3.6 , model.m = M4.2 , treat = "spending_GDP" , mediator = "ccORG1000" )
M3.8 _SD <- mediate (model.y = M3.6 , model.m = M4.2 , treat = "gini_rev_i" , mediator = "ccORG1000" )
med_penetration3 <- mediate (model.m = ORGlm_3, model.y = COUNTlm_3Y, mediator = "ccORG1000" , treat = "Penetration" )
med_autonomy3 <- mediate (model.m = ORGlm_3, model.y = COUNTlm_3Y, mediator = "ccORG1000" , treat = "media_autonomy_i" )
med_spending3 <- mediate (model.m = ORGlm_3, model.y = COUNTlm_3Y, mediator = "ccORG1000" , treat = "spending_GDP" )
med_diversity3 <- mediate (model.m = ORGlm_3, model.y = COUNTlm_3Y, mediator = "ccORG1000" , treat = "gini_rev_i" )
med_penetration3X <- mediate (model.m = ORGlm_1, model.y = COUNTlm_3X, mediator = "ccORG1000" , treat = "Penetration" )
med_autonomy3X <- mediate (model.m = ORGlm_1, model.y = COUNTlm_3X, mediator = "ccORG1000" , treat = "media_autonomy_i" )
med_spending3X <- mediate (model.m = ORGlm_2, model.y = COUNTlm_3X, mediator = "ccORG1000" , treat = "spending_GDP" )
med_diversity3X <- mediate (model.m = ORGlm_2, model.y = COUNTlm_3X, mediator = "ccORG1000" , treat = "gini_rev_i" )